1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
55 #include "tm-constrs.h"
59 #include "sched-int.h"
63 #include "diagnostic.h"
65 enum upper_128bits_state
72 typedef struct block_info_def
74 /* State of the upper 128bits of AVX registers at exit. */
75 enum upper_128bits_state state;
76 /* TRUE if state of the upper 128bits of AVX registers is unchanged
79 /* TRUE if block has been processed. */
81 /* TRUE if block has been scanned. */
83 /* Previous state of the upper 128bits of AVX registers at entry. */
84 enum upper_128bits_state prev;
87 #define BLOCK_INFO(B) ((block_info) (B)->aux)
89 enum call_avx256_state
91 /* Callee returns 256bit AVX register. */
92 callee_return_avx256 = -1,
93 /* Callee returns and passes 256bit AVX register. */
94 callee_return_pass_avx256,
95 /* Callee passes 256bit AVX register. */
97 /* Callee doesn't return nor passe 256bit AVX register, or no
98 256bit AVX register in function return. */
100 /* vzeroupper intrinsic. */
104 /* Check if a 256bit AVX register is referenced in stores. */
107 check_avx256_stores (rtx dest, const_rtx set, void *data)
110 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
111 || (GET_CODE (set) == SET
112 && REG_P (SET_SRC (set))
113 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
115 enum upper_128bits_state *state
116 = (enum upper_128bits_state *) data;
121 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
122 in basic block BB. Delete it if upper 128bit AVX registers are
123 unused. If it isn't deleted, move it to just before a jump insn.
125 STATE is state of the upper 128bits of AVX registers at entry. */
128 move_or_delete_vzeroupper_2 (basic_block bb,
129 enum upper_128bits_state state)
132 rtx vzeroupper_insn = NULL_RTX;
137 if (BLOCK_INFO (bb)->unchanged)
140 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
143 BLOCK_INFO (bb)->state = state;
147 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
150 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
151 bb->index, BLOCK_INFO (bb)->state);
155 BLOCK_INFO (bb)->prev = state;
158 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
163 /* BB_END changes when it is deleted. */
164 bb_end = BB_END (bb);
166 while (insn != bb_end)
168 insn = NEXT_INSN (insn);
170 if (!NONDEBUG_INSN_P (insn))
173 /* Move vzeroupper before jump/call. */
174 if (JUMP_P (insn) || CALL_P (insn))
176 if (!vzeroupper_insn)
179 if (PREV_INSN (insn) != vzeroupper_insn)
183 fprintf (dump_file, "Move vzeroupper after:\n");
184 print_rtl_single (dump_file, PREV_INSN (insn));
185 fprintf (dump_file, "before:\n");
186 print_rtl_single (dump_file, insn);
188 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
191 vzeroupper_insn = NULL_RTX;
195 pat = PATTERN (insn);
197 /* Check insn for vzeroupper intrinsic. */
198 if (GET_CODE (pat) == UNSPEC_VOLATILE
199 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
203 /* Found vzeroupper intrinsic. */
204 fprintf (dump_file, "Found vzeroupper:\n");
205 print_rtl_single (dump_file, insn);
210 /* Check insn for vzeroall intrinsic. */
211 if (GET_CODE (pat) == PARALLEL
212 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
213 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
218 /* Delete pending vzeroupper insertion. */
221 delete_insn (vzeroupper_insn);
222 vzeroupper_insn = NULL_RTX;
225 else if (state != used)
227 note_stores (pat, check_avx256_stores, &state);
234 /* Process vzeroupper intrinsic. */
235 avx256 = INTVAL (XVECEXP (pat, 0, 0));
239 /* Since the upper 128bits are cleared, callee must not pass
240 256bit AVX register. We only need to check if callee
241 returns 256bit AVX register. */
242 if (avx256 == callee_return_avx256)
248 /* Remove unnecessary vzeroupper since upper 128bits are
252 fprintf (dump_file, "Delete redundant vzeroupper:\n");
253 print_rtl_single (dump_file, insn);
259 /* Set state to UNUSED if callee doesn't return 256bit AVX
261 if (avx256 != callee_return_pass_avx256)
264 if (avx256 == callee_return_pass_avx256
265 || avx256 == callee_pass_avx256)
267 /* Must remove vzeroupper since callee passes in 256bit
271 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
272 print_rtl_single (dump_file, insn);
278 vzeroupper_insn = insn;
284 BLOCK_INFO (bb)->state = state;
285 BLOCK_INFO (bb)->unchanged = unchanged;
286 BLOCK_INFO (bb)->scanned = true;
289 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
290 bb->index, unchanged ? "unchanged" : "changed",
294 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
295 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
296 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
300 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
304 enum upper_128bits_state state, old_state, new_state;
308 fprintf (dump_file, " Process [bb %i]: status: %d\n",
309 block->index, BLOCK_INFO (block)->processed);
311 if (BLOCK_INFO (block)->processed)
316 /* Check all predecessor edges of this block. */
317 seen_unknown = false;
318 FOR_EACH_EDGE (e, ei, block->preds)
322 switch (BLOCK_INFO (e->src)->state)
325 if (!unknown_is_unused)
339 old_state = BLOCK_INFO (block)->state;
340 move_or_delete_vzeroupper_2 (block, state);
341 new_state = BLOCK_INFO (block)->state;
343 if (state != unknown || new_state == used)
344 BLOCK_INFO (block)->processed = true;
346 /* Need to rescan if the upper 128bits of AVX registers are changed
348 if (new_state != old_state)
350 if (new_state == used)
351 cfun->machine->rescan_vzeroupper_p = 1;
358 /* Go through the instruction stream looking for vzeroupper. Delete
359 it if upper 128bit AVX registers are unused. If it isn't deleted,
360 move it to just before a jump insn. */
363 move_or_delete_vzeroupper (void)
368 fibheap_t worklist, pending, fibheap_swap;
369 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
374 /* Set up block info for each basic block. */
375 alloc_aux_for_blocks (sizeof (struct block_info_def));
377 /* Process outgoing edges of entry point. */
379 fprintf (dump_file, "Process outgoing edges of entry point\n");
381 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
383 move_or_delete_vzeroupper_2 (e->dest,
384 cfun->machine->caller_pass_avx256_p
386 BLOCK_INFO (e->dest)->processed = true;
389 /* Compute reverse completion order of depth first search of the CFG
390 so that the data-flow runs faster. */
391 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
392 bb_order = XNEWVEC (int, last_basic_block);
393 pre_and_rev_post_order_compute (NULL, rc_order, false);
394 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
395 bb_order[rc_order[i]] = i;
398 worklist = fibheap_new ();
399 pending = fibheap_new ();
400 visited = sbitmap_alloc (last_basic_block);
401 in_worklist = sbitmap_alloc (last_basic_block);
402 in_pending = sbitmap_alloc (last_basic_block);
403 sbitmap_zero (in_worklist);
405 /* Don't check outgoing edges of entry point. */
406 sbitmap_ones (in_pending);
408 if (BLOCK_INFO (bb)->processed)
409 RESET_BIT (in_pending, bb->index);
412 move_or_delete_vzeroupper_1 (bb, false);
413 fibheap_insert (pending, bb_order[bb->index], bb);
417 fprintf (dump_file, "Check remaining basic blocks\n");
419 while (!fibheap_empty (pending))
421 fibheap_swap = pending;
423 worklist = fibheap_swap;
424 sbitmap_swap = in_pending;
425 in_pending = in_worklist;
426 in_worklist = sbitmap_swap;
428 sbitmap_zero (visited);
430 cfun->machine->rescan_vzeroupper_p = 0;
432 while (!fibheap_empty (worklist))
434 bb = (basic_block) fibheap_extract_min (worklist);
435 RESET_BIT (in_worklist, bb->index);
436 gcc_assert (!TEST_BIT (visited, bb->index));
437 if (!TEST_BIT (visited, bb->index))
441 SET_BIT (visited, bb->index);
443 if (move_or_delete_vzeroupper_1 (bb, false))
444 FOR_EACH_EDGE (e, ei, bb->succs)
446 if (e->dest == EXIT_BLOCK_PTR
447 || BLOCK_INFO (e->dest)->processed)
450 if (TEST_BIT (visited, e->dest->index))
452 if (!TEST_BIT (in_pending, e->dest->index))
454 /* Send E->DEST to next round. */
455 SET_BIT (in_pending, e->dest->index);
456 fibheap_insert (pending,
457 bb_order[e->dest->index],
461 else if (!TEST_BIT (in_worklist, e->dest->index))
463 /* Add E->DEST to current round. */
464 SET_BIT (in_worklist, e->dest->index);
465 fibheap_insert (worklist, bb_order[e->dest->index],
472 if (!cfun->machine->rescan_vzeroupper_p)
477 fibheap_delete (worklist);
478 fibheap_delete (pending);
479 sbitmap_free (visited);
480 sbitmap_free (in_worklist);
481 sbitmap_free (in_pending);
484 fprintf (dump_file, "Process remaining basic blocks\n");
487 move_or_delete_vzeroupper_1 (bb, true);
489 free_aux_for_blocks ();
492 static rtx legitimize_dllimport_symbol (rtx, bool);
494 #ifndef CHECK_STACK_LIMIT
495 #define CHECK_STACK_LIMIT (-1)
498 /* Return index of given mode in mult and division cost tables. */
499 #define MODE_INDEX(mode) \
500 ((mode) == QImode ? 0 \
501 : (mode) == HImode ? 1 \
502 : (mode) == SImode ? 2 \
503 : (mode) == DImode ? 3 \
506 /* Processor costs (relative to an add) */
507 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
508 #define COSTS_N_BYTES(N) ((N) * 2)
510 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
513 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
514 COSTS_N_BYTES (2), /* cost of an add instruction */
515 COSTS_N_BYTES (3), /* cost of a lea instruction */
516 COSTS_N_BYTES (2), /* variable shift costs */
517 COSTS_N_BYTES (3), /* constant shift costs */
518 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
519 COSTS_N_BYTES (3), /* HI */
520 COSTS_N_BYTES (3), /* SI */
521 COSTS_N_BYTES (3), /* DI */
522 COSTS_N_BYTES (5)}, /* other */
523 0, /* cost of multiply per each bit set */
524 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
525 COSTS_N_BYTES (3), /* HI */
526 COSTS_N_BYTES (3), /* SI */
527 COSTS_N_BYTES (3), /* DI */
528 COSTS_N_BYTES (5)}, /* other */
529 COSTS_N_BYTES (3), /* cost of movsx */
530 COSTS_N_BYTES (3), /* cost of movzx */
531 0, /* "large" insn */
533 2, /* cost for loading QImode using movzbl */
534 {2, 2, 2}, /* cost of loading integer registers
535 in QImode, HImode and SImode.
536 Relative to reg-reg move (2). */
537 {2, 2, 2}, /* cost of storing integer registers */
538 2, /* cost of reg,reg fld/fst */
539 {2, 2, 2}, /* cost of loading fp registers
540 in SFmode, DFmode and XFmode */
541 {2, 2, 2}, /* cost of storing fp registers
542 in SFmode, DFmode and XFmode */
543 3, /* cost of moving MMX register */
544 {3, 3}, /* cost of loading MMX registers
545 in SImode and DImode */
546 {3, 3}, /* cost of storing MMX registers
547 in SImode and DImode */
548 3, /* cost of moving SSE register */
549 {3, 3, 3}, /* cost of loading SSE registers
550 in SImode, DImode and TImode */
551 {3, 3, 3}, /* cost of storing SSE registers
552 in SImode, DImode and TImode */
553 3, /* MMX or SSE register to integer */
554 0, /* size of l1 cache */
555 0, /* size of l2 cache */
556 0, /* size of prefetch block */
557 0, /* number of parallel prefetches */
559 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
561 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
562 COSTS_N_BYTES (2), /* cost of FABS instruction. */
563 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
564 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
565 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
566 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
567 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
568 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 1, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 1, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
582 /* Processor costs (relative to an add) */
584 struct processor_costs i386_cost = { /* 386 specific costs */
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (1), /* cost of a lea instruction */
587 COSTS_N_INSNS (3), /* variable shift costs */
588 COSTS_N_INSNS (2), /* constant shift costs */
589 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (6), /* HI */
591 COSTS_N_INSNS (6), /* SI */
592 COSTS_N_INSNS (6), /* DI */
593 COSTS_N_INSNS (6)}, /* other */
594 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (23), /* HI */
597 COSTS_N_INSNS (23), /* SI */
598 COSTS_N_INSNS (23), /* DI */
599 COSTS_N_INSNS (23)}, /* other */
600 COSTS_N_INSNS (3), /* cost of movsx */
601 COSTS_N_INSNS (2), /* cost of movzx */
602 15, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {2, 4, 2}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {2, 4, 2}, /* cost of storing integer registers */
609 2, /* cost of reg,reg fld/fst */
610 {8, 8, 8}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {8, 8, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 8}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 8}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 8, 16}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 8, 16}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 3, /* MMX or SSE register to integer */
625 0, /* size of l1 cache */
626 0, /* size of l2 cache */
627 0, /* size of prefetch block */
628 0, /* number of parallel prefetches */
630 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (22), /* cost of FABS instruction. */
634 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
636 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
637 DUMMY_STRINGOP_ALGS},
638 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
639 DUMMY_STRINGOP_ALGS},
640 1, /* scalar_stmt_cost. */
641 1, /* scalar load_cost. */
642 1, /* scalar_store_cost. */
643 1, /* vec_stmt_cost. */
644 1, /* vec_to_scalar_cost. */
645 1, /* scalar_to_vec_cost. */
646 1, /* vec_align_load_cost. */
647 2, /* vec_unalign_load_cost. */
648 1, /* vec_store_cost. */
649 3, /* cond_taken_branch_cost. */
650 1, /* cond_not_taken_branch_cost. */
654 struct processor_costs i486_cost = { /* 486 specific costs */
655 COSTS_N_INSNS (1), /* cost of an add instruction */
656 COSTS_N_INSNS (1), /* cost of a lea instruction */
657 COSTS_N_INSNS (3), /* variable shift costs */
658 COSTS_N_INSNS (2), /* constant shift costs */
659 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
660 COSTS_N_INSNS (12), /* HI */
661 COSTS_N_INSNS (12), /* SI */
662 COSTS_N_INSNS (12), /* DI */
663 COSTS_N_INSNS (12)}, /* other */
664 1, /* cost of multiply per each bit set */
665 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
666 COSTS_N_INSNS (40), /* HI */
667 COSTS_N_INSNS (40), /* SI */
668 COSTS_N_INSNS (40), /* DI */
669 COSTS_N_INSNS (40)}, /* other */
670 COSTS_N_INSNS (3), /* cost of movsx */
671 COSTS_N_INSNS (2), /* cost of movzx */
672 15, /* "large" insn */
674 4, /* cost for loading QImode using movzbl */
675 {2, 4, 2}, /* cost of loading integer registers
676 in QImode, HImode and SImode.
677 Relative to reg-reg move (2). */
678 {2, 4, 2}, /* cost of storing integer registers */
679 2, /* cost of reg,reg fld/fst */
680 {8, 8, 8}, /* cost of loading fp registers
681 in SFmode, DFmode and XFmode */
682 {8, 8, 8}, /* cost of storing fp registers
683 in SFmode, DFmode and XFmode */
684 2, /* cost of moving MMX register */
685 {4, 8}, /* cost of loading MMX registers
686 in SImode and DImode */
687 {4, 8}, /* cost of storing MMX registers
688 in SImode and DImode */
689 2, /* cost of moving SSE register */
690 {4, 8, 16}, /* cost of loading SSE registers
691 in SImode, DImode and TImode */
692 {4, 8, 16}, /* cost of storing SSE registers
693 in SImode, DImode and TImode */
694 3, /* MMX or SSE register to integer */
695 4, /* size of l1 cache. 486 has 8kB cache
696 shared for code and data, so 4kB is
697 not really precise. */
698 4, /* size of l2 cache */
699 0, /* size of prefetch block */
700 0, /* number of parallel prefetches */
702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
703 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
704 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
705 COSTS_N_INSNS (3), /* cost of FABS instruction. */
706 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
707 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
708 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
709 DUMMY_STRINGOP_ALGS},
710 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
711 DUMMY_STRINGOP_ALGS},
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
726 struct processor_costs pentium_cost = {
727 COSTS_N_INSNS (1), /* cost of an add instruction */
728 COSTS_N_INSNS (1), /* cost of a lea instruction */
729 COSTS_N_INSNS (4), /* variable shift costs */
730 COSTS_N_INSNS (1), /* constant shift costs */
731 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
732 COSTS_N_INSNS (11), /* HI */
733 COSTS_N_INSNS (11), /* SI */
734 COSTS_N_INSNS (11), /* DI */
735 COSTS_N_INSNS (11)}, /* other */
736 0, /* cost of multiply per each bit set */
737 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
738 COSTS_N_INSNS (25), /* HI */
739 COSTS_N_INSNS (25), /* SI */
740 COSTS_N_INSNS (25), /* DI */
741 COSTS_N_INSNS (25)}, /* other */
742 COSTS_N_INSNS (3), /* cost of movsx */
743 COSTS_N_INSNS (2), /* cost of movzx */
744 8, /* "large" insn */
746 6, /* cost for loading QImode using movzbl */
747 {2, 4, 2}, /* cost of loading integer registers
748 in QImode, HImode and SImode.
749 Relative to reg-reg move (2). */
750 {2, 4, 2}, /* cost of storing integer registers */
751 2, /* cost of reg,reg fld/fst */
752 {2, 2, 6}, /* cost of loading fp registers
753 in SFmode, DFmode and XFmode */
754 {4, 4, 6}, /* cost of storing fp registers
755 in SFmode, DFmode and XFmode */
756 8, /* cost of moving MMX register */
757 {8, 8}, /* cost of loading MMX registers
758 in SImode and DImode */
759 {8, 8}, /* cost of storing MMX registers
760 in SImode and DImode */
761 2, /* cost of moving SSE register */
762 {4, 8, 16}, /* cost of loading SSE registers
763 in SImode, DImode and TImode */
764 {4, 8, 16}, /* cost of storing SSE registers
765 in SImode, DImode and TImode */
766 3, /* MMX or SSE register to integer */
767 8, /* size of l1 cache. */
768 8, /* size of l2 cache */
769 0, /* size of prefetch block */
770 0, /* number of parallel prefetches */
772 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
773 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
774 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
775 COSTS_N_INSNS (1), /* cost of FABS instruction. */
776 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
777 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
778 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
779 DUMMY_STRINGOP_ALGS},
780 {{libcall, {{-1, rep_prefix_4_byte}}},
781 DUMMY_STRINGOP_ALGS},
782 1, /* scalar_stmt_cost. */
783 1, /* scalar load_cost. */
784 1, /* scalar_store_cost. */
785 1, /* vec_stmt_cost. */
786 1, /* vec_to_scalar_cost. */
787 1, /* scalar_to_vec_cost. */
788 1, /* vec_align_load_cost. */
789 2, /* vec_unalign_load_cost. */
790 1, /* vec_store_cost. */
791 3, /* cond_taken_branch_cost. */
792 1, /* cond_not_taken_branch_cost. */
796 struct processor_costs pentiumpro_cost = {
797 COSTS_N_INSNS (1), /* cost of an add instruction */
798 COSTS_N_INSNS (1), /* cost of a lea instruction */
799 COSTS_N_INSNS (1), /* variable shift costs */
800 COSTS_N_INSNS (1), /* constant shift costs */
801 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
802 COSTS_N_INSNS (4), /* HI */
803 COSTS_N_INSNS (4), /* SI */
804 COSTS_N_INSNS (4), /* DI */
805 COSTS_N_INSNS (4)}, /* other */
806 0, /* cost of multiply per each bit set */
807 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
808 COSTS_N_INSNS (17), /* HI */
809 COSTS_N_INSNS (17), /* SI */
810 COSTS_N_INSNS (17), /* DI */
811 COSTS_N_INSNS (17)}, /* other */
812 COSTS_N_INSNS (1), /* cost of movsx */
813 COSTS_N_INSNS (1), /* cost of movzx */
814 8, /* "large" insn */
816 2, /* cost for loading QImode using movzbl */
817 {4, 4, 4}, /* cost of loading integer registers
818 in QImode, HImode and SImode.
819 Relative to reg-reg move (2). */
820 {2, 2, 2}, /* cost of storing integer registers */
821 2, /* cost of reg,reg fld/fst */
822 {2, 2, 6}, /* cost of loading fp registers
823 in SFmode, DFmode and XFmode */
824 {4, 4, 6}, /* cost of storing fp registers
825 in SFmode, DFmode and XFmode */
826 2, /* cost of moving MMX register */
827 {2, 2}, /* cost of loading MMX registers
828 in SImode and DImode */
829 {2, 2}, /* cost of storing MMX registers
830 in SImode and DImode */
831 2, /* cost of moving SSE register */
832 {2, 2, 8}, /* cost of loading SSE registers
833 in SImode, DImode and TImode */
834 {2, 2, 8}, /* cost of storing SSE registers
835 in SImode, DImode and TImode */
836 3, /* MMX or SSE register to integer */
837 8, /* size of l1 cache. */
838 256, /* size of l2 cache */
839 32, /* size of prefetch block */
840 6, /* number of parallel prefetches */
842 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
843 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
844 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
845 COSTS_N_INSNS (2), /* cost of FABS instruction. */
846 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
847 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
848 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
849 (we ensure the alignment). For small blocks inline loop is still a
850 noticeable win, for bigger blocks either rep movsl or rep movsb is
851 way to go. Rep movsb has apparently more expensive startup time in CPU,
852 but after 4K the difference is down in the noise. */
853 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
854 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
855 DUMMY_STRINGOP_ALGS},
856 {{rep_prefix_4_byte, {{1024, unrolled_loop},
857 {8192, rep_prefix_4_byte}, {-1, libcall}}},
858 DUMMY_STRINGOP_ALGS},
859 1, /* scalar_stmt_cost. */
860 1, /* scalar load_cost. */
861 1, /* scalar_store_cost. */
862 1, /* vec_stmt_cost. */
863 1, /* vec_to_scalar_cost. */
864 1, /* scalar_to_vec_cost. */
865 1, /* vec_align_load_cost. */
866 2, /* vec_unalign_load_cost. */
867 1, /* vec_store_cost. */
868 3, /* cond_taken_branch_cost. */
869 1, /* cond_not_taken_branch_cost. */
873 struct processor_costs geode_cost = {
874 COSTS_N_INSNS (1), /* cost of an add instruction */
875 COSTS_N_INSNS (1), /* cost of a lea instruction */
876 COSTS_N_INSNS (2), /* variable shift costs */
877 COSTS_N_INSNS (1), /* constant shift costs */
878 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
879 COSTS_N_INSNS (4), /* HI */
880 COSTS_N_INSNS (7), /* SI */
881 COSTS_N_INSNS (7), /* DI */
882 COSTS_N_INSNS (7)}, /* other */
883 0, /* cost of multiply per each bit set */
884 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
885 COSTS_N_INSNS (23), /* HI */
886 COSTS_N_INSNS (39), /* SI */
887 COSTS_N_INSNS (39), /* DI */
888 COSTS_N_INSNS (39)}, /* other */
889 COSTS_N_INSNS (1), /* cost of movsx */
890 COSTS_N_INSNS (1), /* cost of movzx */
891 8, /* "large" insn */
893 1, /* cost for loading QImode using movzbl */
894 {1, 1, 1}, /* cost of loading integer registers
895 in QImode, HImode and SImode.
896 Relative to reg-reg move (2). */
897 {1, 1, 1}, /* cost of storing integer registers */
898 1, /* cost of reg,reg fld/fst */
899 {1, 1, 1}, /* cost of loading fp registers
900 in SFmode, DFmode and XFmode */
901 {4, 6, 6}, /* cost of storing fp registers
902 in SFmode, DFmode and XFmode */
904 1, /* cost of moving MMX register */
905 {1, 1}, /* cost of loading MMX registers
906 in SImode and DImode */
907 {1, 1}, /* cost of storing MMX registers
908 in SImode and DImode */
909 1, /* cost of moving SSE register */
910 {1, 1, 1}, /* cost of loading SSE registers
911 in SImode, DImode and TImode */
912 {1, 1, 1}, /* cost of storing SSE registers
913 in SImode, DImode and TImode */
914 1, /* MMX or SSE register to integer */
915 64, /* size of l1 cache. */
916 128, /* size of l2 cache. */
917 32, /* size of prefetch block */
918 1, /* number of parallel prefetches */
920 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
921 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
922 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
923 COSTS_N_INSNS (1), /* cost of FABS instruction. */
924 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
925 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
926 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
927 DUMMY_STRINGOP_ALGS},
928 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
929 DUMMY_STRINGOP_ALGS},
930 1, /* scalar_stmt_cost. */
931 1, /* scalar load_cost. */
932 1, /* scalar_store_cost. */
933 1, /* vec_stmt_cost. */
934 1, /* vec_to_scalar_cost. */
935 1, /* scalar_to_vec_cost. */
936 1, /* vec_align_load_cost. */
937 2, /* vec_unalign_load_cost. */
938 1, /* vec_store_cost. */
939 3, /* cond_taken_branch_cost. */
940 1, /* cond_not_taken_branch_cost. */
944 struct processor_costs k6_cost = {
945 COSTS_N_INSNS (1), /* cost of an add instruction */
946 COSTS_N_INSNS (2), /* cost of a lea instruction */
947 COSTS_N_INSNS (1), /* variable shift costs */
948 COSTS_N_INSNS (1), /* constant shift costs */
949 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
950 COSTS_N_INSNS (3), /* HI */
951 COSTS_N_INSNS (3), /* SI */
952 COSTS_N_INSNS (3), /* DI */
953 COSTS_N_INSNS (3)}, /* other */
954 0, /* cost of multiply per each bit set */
955 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
956 COSTS_N_INSNS (18), /* HI */
957 COSTS_N_INSNS (18), /* SI */
958 COSTS_N_INSNS (18), /* DI */
959 COSTS_N_INSNS (18)}, /* other */
960 COSTS_N_INSNS (2), /* cost of movsx */
961 COSTS_N_INSNS (2), /* cost of movzx */
962 8, /* "large" insn */
964 3, /* cost for loading QImode using movzbl */
965 {4, 5, 4}, /* cost of loading integer registers
966 in QImode, HImode and SImode.
967 Relative to reg-reg move (2). */
968 {2, 3, 2}, /* cost of storing integer registers */
969 4, /* cost of reg,reg fld/fst */
970 {6, 6, 6}, /* cost of loading fp registers
971 in SFmode, DFmode and XFmode */
972 {4, 4, 4}, /* cost of storing fp registers
973 in SFmode, DFmode and XFmode */
974 2, /* cost of moving MMX register */
975 {2, 2}, /* cost of loading MMX registers
976 in SImode and DImode */
977 {2, 2}, /* cost of storing MMX registers
978 in SImode and DImode */
979 2, /* cost of moving SSE register */
980 {2, 2, 8}, /* cost of loading SSE registers
981 in SImode, DImode and TImode */
982 {2, 2, 8}, /* cost of storing SSE registers
983 in SImode, DImode and TImode */
984 6, /* MMX or SSE register to integer */
985 32, /* size of l1 cache. */
986 32, /* size of l2 cache. Some models
987 have integrated l2 cache, but
988 optimizing for k6 is not important
989 enough to worry about that. */
990 32, /* size of prefetch block */
991 1, /* number of parallel prefetches */
993 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
994 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
995 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
996 COSTS_N_INSNS (2), /* cost of FABS instruction. */
997 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
998 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
999 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1000 DUMMY_STRINGOP_ALGS},
1001 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1002 DUMMY_STRINGOP_ALGS},
1003 1, /* scalar_stmt_cost. */
1004 1, /* scalar load_cost. */
1005 1, /* scalar_store_cost. */
1006 1, /* vec_stmt_cost. */
1007 1, /* vec_to_scalar_cost. */
1008 1, /* scalar_to_vec_cost. */
1009 1, /* vec_align_load_cost. */
1010 2, /* vec_unalign_load_cost. */
1011 1, /* vec_store_cost. */
1012 3, /* cond_taken_branch_cost. */
1013 1, /* cond_not_taken_branch_cost. */
1017 struct processor_costs athlon_cost = {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (2), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (5), /* HI */
1024 COSTS_N_INSNS (5), /* SI */
1025 COSTS_N_INSNS (5), /* DI */
1026 COSTS_N_INSNS (5)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (26), /* HI */
1030 COSTS_N_INSNS (42), /* SI */
1031 COSTS_N_INSNS (74), /* DI */
1032 COSTS_N_INSNS (74)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1037 4, /* cost for loading QImode using movzbl */
1038 {3, 4, 3}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {3, 4, 3}, /* cost of storing integer registers */
1042 4, /* cost of reg,reg fld/fst */
1043 {4, 4, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {6, 6, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 6}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 5}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 5, /* MMX or SSE register to integer */
1058 64, /* size of l1 cache. */
1059 256, /* size of l2 cache. */
1060 64, /* size of prefetch block */
1061 6, /* number of parallel prefetches */
1062 5, /* Branch cost */
1063 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1064 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1065 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1066 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1067 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1068 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1069 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1070 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1071 128 bytes for memset. */
1072 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1073 DUMMY_STRINGOP_ALGS},
1074 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1075 DUMMY_STRINGOP_ALGS},
1076 1, /* scalar_stmt_cost. */
1077 1, /* scalar load_cost. */
1078 1, /* scalar_store_cost. */
1079 1, /* vec_stmt_cost. */
1080 1, /* vec_to_scalar_cost. */
1081 1, /* scalar_to_vec_cost. */
1082 1, /* vec_align_load_cost. */
1083 2, /* vec_unalign_load_cost. */
1084 1, /* vec_store_cost. */
1085 3, /* cond_taken_branch_cost. */
1086 1, /* cond_not_taken_branch_cost. */
1090 struct processor_costs k8_cost = {
1091 COSTS_N_INSNS (1), /* cost of an add instruction */
1092 COSTS_N_INSNS (2), /* cost of a lea instruction */
1093 COSTS_N_INSNS (1), /* variable shift costs */
1094 COSTS_N_INSNS (1), /* constant shift costs */
1095 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1096 COSTS_N_INSNS (4), /* HI */
1097 COSTS_N_INSNS (3), /* SI */
1098 COSTS_N_INSNS (4), /* DI */
1099 COSTS_N_INSNS (5)}, /* other */
1100 0, /* cost of multiply per each bit set */
1101 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1102 COSTS_N_INSNS (26), /* HI */
1103 COSTS_N_INSNS (42), /* SI */
1104 COSTS_N_INSNS (74), /* DI */
1105 COSTS_N_INSNS (74)}, /* other */
1106 COSTS_N_INSNS (1), /* cost of movsx */
1107 COSTS_N_INSNS (1), /* cost of movzx */
1108 8, /* "large" insn */
1110 4, /* cost for loading QImode using movzbl */
1111 {3, 4, 3}, /* cost of loading integer registers
1112 in QImode, HImode and SImode.
1113 Relative to reg-reg move (2). */
1114 {3, 4, 3}, /* cost of storing integer registers */
1115 4, /* cost of reg,reg fld/fst */
1116 {4, 4, 12}, /* cost of loading fp registers
1117 in SFmode, DFmode and XFmode */
1118 {6, 6, 8}, /* cost of storing fp registers
1119 in SFmode, DFmode and XFmode */
1120 2, /* cost of moving MMX register */
1121 {3, 3}, /* cost of loading MMX registers
1122 in SImode and DImode */
1123 {4, 4}, /* cost of storing MMX registers
1124 in SImode and DImode */
1125 2, /* cost of moving SSE register */
1126 {4, 3, 6}, /* cost of loading SSE registers
1127 in SImode, DImode and TImode */
1128 {4, 4, 5}, /* cost of storing SSE registers
1129 in SImode, DImode and TImode */
1130 5, /* MMX or SSE register to integer */
1131 64, /* size of l1 cache. */
1132 512, /* size of l2 cache. */
1133 64, /* size of prefetch block */
1134 /* New AMD processors never drop prefetches; if they cannot be performed
1135 immediately, they are queued. We set number of simultaneous prefetches
1136 to a large constant to reflect this (it probably is not a good idea not
1137 to limit number of prefetches at all, as their execution also takes some
1139 100, /* number of parallel prefetches */
1140 3, /* Branch cost */
1141 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1142 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1143 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1144 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1145 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1146 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1147 /* K8 has optimized REP instruction for medium sized blocks, but for very
1148 small blocks it is better to use loop. For large blocks, libcall can
1149 do nontemporary accesses and beat inline considerably. */
1150 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1151 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1152 {{libcall, {{8, loop}, {24, unrolled_loop},
1153 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1154 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1155 4, /* scalar_stmt_cost. */
1156 2, /* scalar load_cost. */
1157 2, /* scalar_store_cost. */
1158 5, /* vec_stmt_cost. */
1159 0, /* vec_to_scalar_cost. */
1160 2, /* scalar_to_vec_cost. */
1161 2, /* vec_align_load_cost. */
1162 3, /* vec_unalign_load_cost. */
1163 3, /* vec_store_cost. */
1164 3, /* cond_taken_branch_cost. */
1165 2, /* cond_not_taken_branch_cost. */
1168 struct processor_costs amdfam10_cost = {
1169 COSTS_N_INSNS (1), /* cost of an add instruction */
1170 COSTS_N_INSNS (2), /* cost of a lea instruction */
1171 COSTS_N_INSNS (1), /* variable shift costs */
1172 COSTS_N_INSNS (1), /* constant shift costs */
1173 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1174 COSTS_N_INSNS (4), /* HI */
1175 COSTS_N_INSNS (3), /* SI */
1176 COSTS_N_INSNS (4), /* DI */
1177 COSTS_N_INSNS (5)}, /* other */
1178 0, /* cost of multiply per each bit set */
1179 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1180 COSTS_N_INSNS (35), /* HI */
1181 COSTS_N_INSNS (51), /* SI */
1182 COSTS_N_INSNS (83), /* DI */
1183 COSTS_N_INSNS (83)}, /* other */
1184 COSTS_N_INSNS (1), /* cost of movsx */
1185 COSTS_N_INSNS (1), /* cost of movzx */
1186 8, /* "large" insn */
1188 4, /* cost for loading QImode using movzbl */
1189 {3, 4, 3}, /* cost of loading integer registers
1190 in QImode, HImode and SImode.
1191 Relative to reg-reg move (2). */
1192 {3, 4, 3}, /* cost of storing integer registers */
1193 4, /* cost of reg,reg fld/fst */
1194 {4, 4, 12}, /* cost of loading fp registers
1195 in SFmode, DFmode and XFmode */
1196 {6, 6, 8}, /* cost of storing fp registers
1197 in SFmode, DFmode and XFmode */
1198 2, /* cost of moving MMX register */
1199 {3, 3}, /* cost of loading MMX registers
1200 in SImode and DImode */
1201 {4, 4}, /* cost of storing MMX registers
1202 in SImode and DImode */
1203 2, /* cost of moving SSE register */
1204 {4, 4, 3}, /* cost of loading SSE registers
1205 in SImode, DImode and TImode */
1206 {4, 4, 5}, /* cost of storing SSE registers
1207 in SImode, DImode and TImode */
1208 3, /* MMX or SSE register to integer */
1210 MOVD reg64, xmmreg Double FSTORE 4
1211 MOVD reg32, xmmreg Double FSTORE 4
1213 MOVD reg64, xmmreg Double FADD 3
1215 MOVD reg32, xmmreg Double FADD 3
1217 64, /* size of l1 cache. */
1218 512, /* size of l2 cache. */
1219 64, /* size of prefetch block */
1220 /* New AMD processors never drop prefetches; if they cannot be performed
1221 immediately, they are queued. We set number of simultaneous prefetches
1222 to a large constant to reflect this (it probably is not a good idea not
1223 to limit number of prefetches at all, as their execution also takes some
1225 100, /* number of parallel prefetches */
1226 2, /* Branch cost */
1227 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1228 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1229 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1230 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1231 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1232 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1234 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1235 very small blocks it is better to use loop. For large blocks, libcall can
1236 do nontemporary accesses and beat inline considerably. */
1237 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1238 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1239 {{libcall, {{8, loop}, {24, unrolled_loop},
1240 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1241 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1242 4, /* scalar_stmt_cost. */
1243 2, /* scalar load_cost. */
1244 2, /* scalar_store_cost. */
1245 6, /* vec_stmt_cost. */
1246 0, /* vec_to_scalar_cost. */
1247 2, /* scalar_to_vec_cost. */
1248 2, /* vec_align_load_cost. */
1249 2, /* vec_unalign_load_cost. */
1250 2, /* vec_store_cost. */
1251 2, /* cond_taken_branch_cost. */
1252 1, /* cond_not_taken_branch_cost. */
1255 struct processor_costs bdver1_cost = {
1256 COSTS_N_INSNS (1), /* cost of an add instruction */
1257 COSTS_N_INSNS (1), /* cost of a lea instruction */
1258 COSTS_N_INSNS (1), /* variable shift costs */
1259 COSTS_N_INSNS (1), /* constant shift costs */
1260 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1261 COSTS_N_INSNS (4), /* HI */
1262 COSTS_N_INSNS (4), /* SI */
1263 COSTS_N_INSNS (6), /* DI */
1264 COSTS_N_INSNS (6)}, /* other */
1265 0, /* cost of multiply per each bit set */
1266 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1267 COSTS_N_INSNS (35), /* HI */
1268 COSTS_N_INSNS (51), /* SI */
1269 COSTS_N_INSNS (83), /* DI */
1270 COSTS_N_INSNS (83)}, /* other */
1271 COSTS_N_INSNS (1), /* cost of movsx */
1272 COSTS_N_INSNS (1), /* cost of movzx */
1273 8, /* "large" insn */
1275 4, /* cost for loading QImode using movzbl */
1276 {5, 5, 4}, /* cost of loading integer registers
1277 in QImode, HImode and SImode.
1278 Relative to reg-reg move (2). */
1279 {4, 4, 4}, /* cost of storing integer registers */
1280 2, /* cost of reg,reg fld/fst */
1281 {5, 5, 12}, /* cost of loading fp registers
1282 in SFmode, DFmode and XFmode */
1283 {4, 4, 8}, /* cost of storing fp registers
1284 in SFmode, DFmode and XFmode */
1285 2, /* cost of moving MMX register */
1286 {4, 4}, /* cost of loading MMX registers
1287 in SImode and DImode */
1288 {4, 4}, /* cost of storing MMX registers
1289 in SImode and DImode */
1290 2, /* cost of moving SSE register */
1291 {4, 4, 4}, /* cost of loading SSE registers
1292 in SImode, DImode and TImode */
1293 {4, 4, 4}, /* cost of storing SSE registers
1294 in SImode, DImode and TImode */
1295 2, /* MMX or SSE register to integer */
1297 MOVD reg64, xmmreg Double FSTORE 4
1298 MOVD reg32, xmmreg Double FSTORE 4
1300 MOVD reg64, xmmreg Double FADD 3
1302 MOVD reg32, xmmreg Double FADD 3
1304 16, /* size of l1 cache. */
1305 2048, /* size of l2 cache. */
1306 64, /* size of prefetch block */
1307 /* New AMD processors never drop prefetches; if they cannot be performed
1308 immediately, they are queued. We set number of simultaneous prefetches
1309 to a large constant to reflect this (it probably is not a good idea not
1310 to limit number of prefetches at all, as their execution also takes some
1312 100, /* number of parallel prefetches */
1313 2, /* Branch cost */
1314 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1315 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1316 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1317 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1318 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1319 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1321 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1322 very small blocks it is better to use loop. For large blocks, libcall
1323 can do nontemporary accesses and beat inline considerably. */
1324 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1325 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1326 {{libcall, {{8, loop}, {24, unrolled_loop},
1327 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1328 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1329 6, /* scalar_stmt_cost. */
1330 4, /* scalar load_cost. */
1331 4, /* scalar_store_cost. */
1332 6, /* vec_stmt_cost. */
1333 0, /* vec_to_scalar_cost. */
1334 2, /* scalar_to_vec_cost. */
1335 4, /* vec_align_load_cost. */
1336 4, /* vec_unalign_load_cost. */
1337 4, /* vec_store_cost. */
1338 2, /* cond_taken_branch_cost. */
1339 1, /* cond_not_taken_branch_cost. */
1342 struct processor_costs bdver2_cost = {
1343 COSTS_N_INSNS (1), /* cost of an add instruction */
1344 COSTS_N_INSNS (1), /* cost of a lea instruction */
1345 COSTS_N_INSNS (1), /* variable shift costs */
1346 COSTS_N_INSNS (1), /* constant shift costs */
1347 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1348 COSTS_N_INSNS (4), /* HI */
1349 COSTS_N_INSNS (4), /* SI */
1350 COSTS_N_INSNS (6), /* DI */
1351 COSTS_N_INSNS (6)}, /* other */
1352 0, /* cost of multiply per each bit set */
1353 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1354 COSTS_N_INSNS (35), /* HI */
1355 COSTS_N_INSNS (51), /* SI */
1356 COSTS_N_INSNS (83), /* DI */
1357 COSTS_N_INSNS (83)}, /* other */
1358 COSTS_N_INSNS (1), /* cost of movsx */
1359 COSTS_N_INSNS (1), /* cost of movzx */
1360 8, /* "large" insn */
1362 4, /* cost for loading QImode using movzbl */
1363 {5, 5, 4}, /* cost of loading integer registers
1364 in QImode, HImode and SImode.
1365 Relative to reg-reg move (2). */
1366 {4, 4, 4}, /* cost of storing integer registers */
1367 2, /* cost of reg,reg fld/fst */
1368 {5, 5, 12}, /* cost of loading fp registers
1369 in SFmode, DFmode and XFmode */
1370 {4, 4, 8}, /* cost of storing fp registers
1371 in SFmode, DFmode and XFmode */
1372 2, /* cost of moving MMX register */
1373 {4, 4}, /* cost of loading MMX registers
1374 in SImode and DImode */
1375 {4, 4}, /* cost of storing MMX registers
1376 in SImode and DImode */
1377 2, /* cost of moving SSE register */
1378 {4, 4, 4}, /* cost of loading SSE registers
1379 in SImode, DImode and TImode */
1380 {4, 4, 4}, /* cost of storing SSE registers
1381 in SImode, DImode and TImode */
1382 2, /* MMX or SSE register to integer */
1384 MOVD reg64, xmmreg Double FSTORE 4
1385 MOVD reg32, xmmreg Double FSTORE 4
1387 MOVD reg64, xmmreg Double FADD 3
1389 MOVD reg32, xmmreg Double FADD 3
1391 16, /* size of l1 cache. */
1392 2048, /* size of l2 cache. */
1393 64, /* size of prefetch block */
1394 /* New AMD processors never drop prefetches; if they cannot be performed
1395 immediately, they are queued. We set number of simultaneous prefetches
1396 to a large constant to reflect this (it probably is not a good idea not
1397 to limit number of prefetches at all, as their execution also takes some
1399 100, /* number of parallel prefetches */
1400 2, /* Branch cost */
1401 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1402 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1403 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1404 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1405 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1406 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1408 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1409 very small blocks it is better to use loop. For large blocks, libcall
1410 can do nontemporary accesses and beat inline considerably. */
1411 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1412 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1413 {{libcall, {{8, loop}, {24, unrolled_loop},
1414 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1415 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1416 6, /* scalar_stmt_cost. */
1417 4, /* scalar load_cost. */
1418 4, /* scalar_store_cost. */
1419 6, /* vec_stmt_cost. */
1420 0, /* vec_to_scalar_cost. */
1421 2, /* scalar_to_vec_cost. */
1422 4, /* vec_align_load_cost. */
1423 4, /* vec_unalign_load_cost. */
1424 4, /* vec_store_cost. */
1425 2, /* cond_taken_branch_cost. */
1426 1, /* cond_not_taken_branch_cost. */
1429 struct processor_costs btver1_cost = {
1430 COSTS_N_INSNS (1), /* cost of an add instruction */
1431 COSTS_N_INSNS (2), /* cost of a lea instruction */
1432 COSTS_N_INSNS (1), /* variable shift costs */
1433 COSTS_N_INSNS (1), /* constant shift costs */
1434 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1435 COSTS_N_INSNS (4), /* HI */
1436 COSTS_N_INSNS (3), /* SI */
1437 COSTS_N_INSNS (4), /* DI */
1438 COSTS_N_INSNS (5)}, /* other */
1439 0, /* cost of multiply per each bit set */
1440 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1441 COSTS_N_INSNS (35), /* HI */
1442 COSTS_N_INSNS (51), /* SI */
1443 COSTS_N_INSNS (83), /* DI */
1444 COSTS_N_INSNS (83)}, /* other */
1445 COSTS_N_INSNS (1), /* cost of movsx */
1446 COSTS_N_INSNS (1), /* cost of movzx */
1447 8, /* "large" insn */
1449 4, /* cost for loading QImode using movzbl */
1450 {3, 4, 3}, /* cost of loading integer registers
1451 in QImode, HImode and SImode.
1452 Relative to reg-reg move (2). */
1453 {3, 4, 3}, /* cost of storing integer registers */
1454 4, /* cost of reg,reg fld/fst */
1455 {4, 4, 12}, /* cost of loading fp registers
1456 in SFmode, DFmode and XFmode */
1457 {6, 6, 8}, /* cost of storing fp registers
1458 in SFmode, DFmode and XFmode */
1459 2, /* cost of moving MMX register */
1460 {3, 3}, /* cost of loading MMX registers
1461 in SImode and DImode */
1462 {4, 4}, /* cost of storing MMX registers
1463 in SImode and DImode */
1464 2, /* cost of moving SSE register */
1465 {4, 4, 3}, /* cost of loading SSE registers
1466 in SImode, DImode and TImode */
1467 {4, 4, 5}, /* cost of storing SSE registers
1468 in SImode, DImode and TImode */
1469 3, /* MMX or SSE register to integer */
1471 MOVD reg64, xmmreg Double FSTORE 4
1472 MOVD reg32, xmmreg Double FSTORE 4
1474 MOVD reg64, xmmreg Double FADD 3
1476 MOVD reg32, xmmreg Double FADD 3
1478 32, /* size of l1 cache. */
1479 512, /* size of l2 cache. */
1480 64, /* size of prefetch block */
1481 100, /* number of parallel prefetches */
1482 2, /* Branch cost */
1483 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1484 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1485 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1486 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1487 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1488 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1490 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1491 very small blocks it is better to use loop. For large blocks, libcall can
1492 do nontemporary accesses and beat inline considerably. */
1493 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1494 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1495 {{libcall, {{8, loop}, {24, unrolled_loop},
1496 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1497 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1498 4, /* scalar_stmt_cost. */
1499 2, /* scalar load_cost. */
1500 2, /* scalar_store_cost. */
1501 6, /* vec_stmt_cost. */
1502 0, /* vec_to_scalar_cost. */
1503 2, /* scalar_to_vec_cost. */
1504 2, /* vec_align_load_cost. */
1505 2, /* vec_unalign_load_cost. */
1506 2, /* vec_store_cost. */
1507 2, /* cond_taken_branch_cost. */
1508 1, /* cond_not_taken_branch_cost. */
1512 struct processor_costs pentium4_cost = {
1513 COSTS_N_INSNS (1), /* cost of an add instruction */
1514 COSTS_N_INSNS (3), /* cost of a lea instruction */
1515 COSTS_N_INSNS (4), /* variable shift costs */
1516 COSTS_N_INSNS (4), /* constant shift costs */
1517 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1518 COSTS_N_INSNS (15), /* HI */
1519 COSTS_N_INSNS (15), /* SI */
1520 COSTS_N_INSNS (15), /* DI */
1521 COSTS_N_INSNS (15)}, /* other */
1522 0, /* cost of multiply per each bit set */
1523 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1524 COSTS_N_INSNS (56), /* HI */
1525 COSTS_N_INSNS (56), /* SI */
1526 COSTS_N_INSNS (56), /* DI */
1527 COSTS_N_INSNS (56)}, /* other */
1528 COSTS_N_INSNS (1), /* cost of movsx */
1529 COSTS_N_INSNS (1), /* cost of movzx */
1530 16, /* "large" insn */
1532 2, /* cost for loading QImode using movzbl */
1533 {4, 5, 4}, /* cost of loading integer registers
1534 in QImode, HImode and SImode.
1535 Relative to reg-reg move (2). */
1536 {2, 3, 2}, /* cost of storing integer registers */
1537 2, /* cost of reg,reg fld/fst */
1538 {2, 2, 6}, /* cost of loading fp registers
1539 in SFmode, DFmode and XFmode */
1540 {4, 4, 6}, /* cost of storing fp registers
1541 in SFmode, DFmode and XFmode */
1542 2, /* cost of moving MMX register */
1543 {2, 2}, /* cost of loading MMX registers
1544 in SImode and DImode */
1545 {2, 2}, /* cost of storing MMX registers
1546 in SImode and DImode */
1547 12, /* cost of moving SSE register */
1548 {12, 12, 12}, /* cost of loading SSE registers
1549 in SImode, DImode and TImode */
1550 {2, 2, 8}, /* cost of storing SSE registers
1551 in SImode, DImode and TImode */
1552 10, /* MMX or SSE register to integer */
1553 8, /* size of l1 cache. */
1554 256, /* size of l2 cache. */
1555 64, /* size of prefetch block */
1556 6, /* number of parallel prefetches */
1557 2, /* Branch cost */
1558 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1559 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1560 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1561 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1562 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1563 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1564 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1565 DUMMY_STRINGOP_ALGS},
1566 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1568 DUMMY_STRINGOP_ALGS},
1569 1, /* scalar_stmt_cost. */
1570 1, /* scalar load_cost. */
1571 1, /* scalar_store_cost. */
1572 1, /* vec_stmt_cost. */
1573 1, /* vec_to_scalar_cost. */
1574 1, /* scalar_to_vec_cost. */
1575 1, /* vec_align_load_cost. */
1576 2, /* vec_unalign_load_cost. */
1577 1, /* vec_store_cost. */
1578 3, /* cond_taken_branch_cost. */
1579 1, /* cond_not_taken_branch_cost. */
1583 struct processor_costs nocona_cost = {
1584 COSTS_N_INSNS (1), /* cost of an add instruction */
1585 COSTS_N_INSNS (1), /* cost of a lea instruction */
1586 COSTS_N_INSNS (1), /* variable shift costs */
1587 COSTS_N_INSNS (1), /* constant shift costs */
1588 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1589 COSTS_N_INSNS (10), /* HI */
1590 COSTS_N_INSNS (10), /* SI */
1591 COSTS_N_INSNS (10), /* DI */
1592 COSTS_N_INSNS (10)}, /* other */
1593 0, /* cost of multiply per each bit set */
1594 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1595 COSTS_N_INSNS (66), /* HI */
1596 COSTS_N_INSNS (66), /* SI */
1597 COSTS_N_INSNS (66), /* DI */
1598 COSTS_N_INSNS (66)}, /* other */
1599 COSTS_N_INSNS (1), /* cost of movsx */
1600 COSTS_N_INSNS (1), /* cost of movzx */
1601 16, /* "large" insn */
1602 17, /* MOVE_RATIO */
1603 4, /* cost for loading QImode using movzbl */
1604 {4, 4, 4}, /* cost of loading integer registers
1605 in QImode, HImode and SImode.
1606 Relative to reg-reg move (2). */
1607 {4, 4, 4}, /* cost of storing integer registers */
1608 3, /* cost of reg,reg fld/fst */
1609 {12, 12, 12}, /* cost of loading fp registers
1610 in SFmode, DFmode and XFmode */
1611 {4, 4, 4}, /* cost of storing fp registers
1612 in SFmode, DFmode and XFmode */
1613 6, /* cost of moving MMX register */
1614 {12, 12}, /* cost of loading MMX registers
1615 in SImode and DImode */
1616 {12, 12}, /* cost of storing MMX registers
1617 in SImode and DImode */
1618 6, /* cost of moving SSE register */
1619 {12, 12, 12}, /* cost of loading SSE registers
1620 in SImode, DImode and TImode */
1621 {12, 12, 12}, /* cost of storing SSE registers
1622 in SImode, DImode and TImode */
1623 8, /* MMX or SSE register to integer */
1624 8, /* size of l1 cache. */
1625 1024, /* size of l2 cache. */
1626 64, /* size of prefetch block */
1627 8, /* number of parallel prefetches */
1628 1, /* Branch cost */
1629 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1630 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1631 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1632 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1633 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1634 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1635 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1636 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1637 {100000, unrolled_loop}, {-1, libcall}}}},
1638 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1640 {libcall, {{24, loop}, {64, unrolled_loop},
1641 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1642 1, /* scalar_stmt_cost. */
1643 1, /* scalar load_cost. */
1644 1, /* scalar_store_cost. */
1645 1, /* vec_stmt_cost. */
1646 1, /* vec_to_scalar_cost. */
1647 1, /* scalar_to_vec_cost. */
1648 1, /* vec_align_load_cost. */
1649 2, /* vec_unalign_load_cost. */
1650 1, /* vec_store_cost. */
1651 3, /* cond_taken_branch_cost. */
1652 1, /* cond_not_taken_branch_cost. */
1656 struct processor_costs atom_cost = {
1657 COSTS_N_INSNS (1), /* cost of an add instruction */
1658 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1659 COSTS_N_INSNS (1), /* variable shift costs */
1660 COSTS_N_INSNS (1), /* constant shift costs */
1661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1662 COSTS_N_INSNS (4), /* HI */
1663 COSTS_N_INSNS (3), /* SI */
1664 COSTS_N_INSNS (4), /* DI */
1665 COSTS_N_INSNS (2)}, /* other */
1666 0, /* cost of multiply per each bit set */
1667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1668 COSTS_N_INSNS (26), /* HI */
1669 COSTS_N_INSNS (42), /* SI */
1670 COSTS_N_INSNS (74), /* DI */
1671 COSTS_N_INSNS (74)}, /* other */
1672 COSTS_N_INSNS (1), /* cost of movsx */
1673 COSTS_N_INSNS (1), /* cost of movzx */
1674 8, /* "large" insn */
1675 17, /* MOVE_RATIO */
1676 4, /* cost for loading QImode using movzbl */
1677 {4, 4, 4}, /* cost of loading integer registers
1678 in QImode, HImode and SImode.
1679 Relative to reg-reg move (2). */
1680 {4, 4, 4}, /* cost of storing integer registers */
1681 4, /* cost of reg,reg fld/fst */
1682 {12, 12, 12}, /* cost of loading fp registers
1683 in SFmode, DFmode and XFmode */
1684 {6, 6, 8}, /* cost of storing fp registers
1685 in SFmode, DFmode and XFmode */
1686 2, /* cost of moving MMX register */
1687 {8, 8}, /* cost of loading MMX registers
1688 in SImode and DImode */
1689 {8, 8}, /* cost of storing MMX registers
1690 in SImode and DImode */
1691 2, /* cost of moving SSE register */
1692 {8, 8, 8}, /* cost of loading SSE registers
1693 in SImode, DImode and TImode */
1694 {8, 8, 8}, /* cost of storing SSE registers
1695 in SImode, DImode and TImode */
1696 5, /* MMX or SSE register to integer */
1697 32, /* size of l1 cache. */
1698 256, /* size of l2 cache. */
1699 64, /* size of prefetch block */
1700 6, /* number of parallel prefetches */
1701 3, /* Branch cost */
1702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1703 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1704 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1705 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1706 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1707 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1708 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1709 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1710 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1711 {{libcall, {{8, loop}, {15, unrolled_loop},
1712 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1713 {libcall, {{24, loop}, {32, unrolled_loop},
1714 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1715 1, /* scalar_stmt_cost. */
1716 1, /* scalar load_cost. */
1717 1, /* scalar_store_cost. */
1718 1, /* vec_stmt_cost. */
1719 1, /* vec_to_scalar_cost. */
1720 1, /* scalar_to_vec_cost. */
1721 1, /* vec_align_load_cost. */
1722 2, /* vec_unalign_load_cost. */
1723 1, /* vec_store_cost. */
1724 3, /* cond_taken_branch_cost. */
1725 1, /* cond_not_taken_branch_cost. */
1728 /* Generic64 should produce code tuned for Nocona and K8. */
1730 struct processor_costs generic64_cost = {
1731 COSTS_N_INSNS (1), /* cost of an add instruction */
1732 /* On all chips taken into consideration lea is 2 cycles and more. With
1733 this cost however our current implementation of synth_mult results in
1734 use of unnecessary temporary registers causing regression on several
1735 SPECfp benchmarks. */
1736 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1737 COSTS_N_INSNS (1), /* variable shift costs */
1738 COSTS_N_INSNS (1), /* constant shift costs */
1739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1740 COSTS_N_INSNS (4), /* HI */
1741 COSTS_N_INSNS (3), /* SI */
1742 COSTS_N_INSNS (4), /* DI */
1743 COSTS_N_INSNS (2)}, /* other */
1744 0, /* cost of multiply per each bit set */
1745 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1746 COSTS_N_INSNS (26), /* HI */
1747 COSTS_N_INSNS (42), /* SI */
1748 COSTS_N_INSNS (74), /* DI */
1749 COSTS_N_INSNS (74)}, /* other */
1750 COSTS_N_INSNS (1), /* cost of movsx */
1751 COSTS_N_INSNS (1), /* cost of movzx */
1752 8, /* "large" insn */
1753 17, /* MOVE_RATIO */
1754 4, /* cost for loading QImode using movzbl */
1755 {4, 4, 4}, /* cost of loading integer registers
1756 in QImode, HImode and SImode.
1757 Relative to reg-reg move (2). */
1758 {4, 4, 4}, /* cost of storing integer registers */
1759 4, /* cost of reg,reg fld/fst */
1760 {12, 12, 12}, /* cost of loading fp registers
1761 in SFmode, DFmode and XFmode */
1762 {6, 6, 8}, /* cost of storing fp registers
1763 in SFmode, DFmode and XFmode */
1764 2, /* cost of moving MMX register */
1765 {8, 8}, /* cost of loading MMX registers
1766 in SImode and DImode */
1767 {8, 8}, /* cost of storing MMX registers
1768 in SImode and DImode */
1769 2, /* cost of moving SSE register */
1770 {8, 8, 8}, /* cost of loading SSE registers
1771 in SImode, DImode and TImode */
1772 {8, 8, 8}, /* cost of storing SSE registers
1773 in SImode, DImode and TImode */
1774 5, /* MMX or SSE register to integer */
1775 32, /* size of l1 cache. */
1776 512, /* size of l2 cache. */
1777 64, /* size of prefetch block */
1778 6, /* number of parallel prefetches */
1779 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1780 value is increased to perhaps more appropriate value of 5. */
1781 3, /* Branch cost */
1782 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1783 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1784 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1785 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1786 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1787 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1788 {DUMMY_STRINGOP_ALGS,
1789 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1790 {DUMMY_STRINGOP_ALGS,
1791 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1792 1, /* scalar_stmt_cost. */
1793 1, /* scalar load_cost. */
1794 1, /* scalar_store_cost. */
1795 1, /* vec_stmt_cost. */
1796 1, /* vec_to_scalar_cost. */
1797 1, /* scalar_to_vec_cost. */
1798 1, /* vec_align_load_cost. */
1799 2, /* vec_unalign_load_cost. */
1800 1, /* vec_store_cost. */
1801 3, /* cond_taken_branch_cost. */
1802 1, /* cond_not_taken_branch_cost. */
1805 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1808 struct processor_costs generic32_cost = {
1809 COSTS_N_INSNS (1), /* cost of an add instruction */
1810 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1811 COSTS_N_INSNS (1), /* variable shift costs */
1812 COSTS_N_INSNS (1), /* constant shift costs */
1813 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1814 COSTS_N_INSNS (4), /* HI */
1815 COSTS_N_INSNS (3), /* SI */
1816 COSTS_N_INSNS (4), /* DI */
1817 COSTS_N_INSNS (2)}, /* other */
1818 0, /* cost of multiply per each bit set */
1819 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1820 COSTS_N_INSNS (26), /* HI */
1821 COSTS_N_INSNS (42), /* SI */
1822 COSTS_N_INSNS (74), /* DI */
1823 COSTS_N_INSNS (74)}, /* other */
1824 COSTS_N_INSNS (1), /* cost of movsx */
1825 COSTS_N_INSNS (1), /* cost of movzx */
1826 8, /* "large" insn */
1827 17, /* MOVE_RATIO */
1828 4, /* cost for loading QImode using movzbl */
1829 {4, 4, 4}, /* cost of loading integer registers
1830 in QImode, HImode and SImode.
1831 Relative to reg-reg move (2). */
1832 {4, 4, 4}, /* cost of storing integer registers */
1833 4, /* cost of reg,reg fld/fst */
1834 {12, 12, 12}, /* cost of loading fp registers
1835 in SFmode, DFmode and XFmode */
1836 {6, 6, 8}, /* cost of storing fp registers
1837 in SFmode, DFmode and XFmode */
1838 2, /* cost of moving MMX register */
1839 {8, 8}, /* cost of loading MMX registers
1840 in SImode and DImode */
1841 {8, 8}, /* cost of storing MMX registers
1842 in SImode and DImode */
1843 2, /* cost of moving SSE register */
1844 {8, 8, 8}, /* cost of loading SSE registers
1845 in SImode, DImode and TImode */
1846 {8, 8, 8}, /* cost of storing SSE registers
1847 in SImode, DImode and TImode */
1848 5, /* MMX or SSE register to integer */
1849 32, /* size of l1 cache. */
1850 256, /* size of l2 cache. */
1851 64, /* size of prefetch block */
1852 6, /* number of parallel prefetches */
1853 3, /* Branch cost */
1854 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1855 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1856 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1857 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1858 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1859 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1860 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1861 DUMMY_STRINGOP_ALGS},
1862 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1863 DUMMY_STRINGOP_ALGS},
1864 1, /* scalar_stmt_cost. */
1865 1, /* scalar load_cost. */
1866 1, /* scalar_store_cost. */
1867 1, /* vec_stmt_cost. */
1868 1, /* vec_to_scalar_cost. */
1869 1, /* scalar_to_vec_cost. */
1870 1, /* vec_align_load_cost. */
1871 2, /* vec_unalign_load_cost. */
1872 1, /* vec_store_cost. */
1873 3, /* cond_taken_branch_cost. */
1874 1, /* cond_not_taken_branch_cost. */
1877 const struct processor_costs *ix86_cost = &pentium_cost;
1879 /* Processor feature/optimization bitmasks. */
1880 #define m_386 (1<<PROCESSOR_I386)
1881 #define m_486 (1<<PROCESSOR_I486)
1882 #define m_PENT (1<<PROCESSOR_PENTIUM)
1883 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1884 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1885 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1886 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1887 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1888 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1889 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1890 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1891 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1892 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1893 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1894 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1895 #define m_ATOM (1<<PROCESSOR_ATOM)
1897 #define m_GEODE (1<<PROCESSOR_GEODE)
1898 #define m_K6 (1<<PROCESSOR_K6)
1899 #define m_K6_GEODE (m_K6 | m_GEODE)
1900 #define m_K8 (1<<PROCESSOR_K8)
1901 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1902 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1903 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1904 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1905 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1906 #define m_BDVER (m_BDVER1 | m_BDVER2)
1907 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1908 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1910 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1911 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1913 /* Generic instruction choice should be common subset of supported CPUs
1914 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1915 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1917 /* Feature tests against the various tunings. */
1918 unsigned char ix86_tune_features[X86_TUNE_LAST];
1920 /* Feature tests against the various tunings used to create ix86_tune_features
1921 based on the processor mask. */
1922 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1923 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1924 negatively, so enabling for Generic64 seems like good code size
1925 tradeoff. We can't enable it for 32bit generic because it does not
1926 work well with PPro base chips. */
1927 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1929 /* X86_TUNE_PUSH_MEMORY */
1930 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1932 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1935 /* X86_TUNE_UNROLL_STRLEN */
1936 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1938 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1939 on simulation result. But after P4 was made, no performance benefit
1940 was observed with branch hints. It also increases the code size.
1941 As a result, icc never generates branch hints. */
1944 /* X86_TUNE_DOUBLE_WITH_ADD */
1947 /* X86_TUNE_USE_SAHF */
1948 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1950 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1951 partial dependencies. */
1952 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1954 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1955 register stalls on Generic32 compilation setting as well. However
1956 in current implementation the partial register stalls are not eliminated
1957 very well - they can be introduced via subregs synthesized by combine
1958 and can happen in caller/callee saving sequences. Because this option
1959 pays back little on PPro based chips and is in conflict with partial reg
1960 dependencies used by Athlon/P4 based chips, it is better to leave it off
1961 for generic32 for now. */
1964 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1965 m_CORE2I7 | m_GENERIC,
1967 /* X86_TUNE_USE_HIMODE_FIOP */
1968 m_386 | m_486 | m_K6_GEODE,
1970 /* X86_TUNE_USE_SIMODE_FIOP */
1971 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1973 /* X86_TUNE_USE_MOV0 */
1976 /* X86_TUNE_USE_CLTD */
1977 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1979 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1982 /* X86_TUNE_SPLIT_LONG_MOVES */
1985 /* X86_TUNE_READ_MODIFY_WRITE */
1988 /* X86_TUNE_READ_MODIFY */
1991 /* X86_TUNE_PROMOTE_QIMODE */
1992 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1994 /* X86_TUNE_FAST_PREFIX */
1995 ~(m_386 | m_486 | m_PENT),
1997 /* X86_TUNE_SINGLE_STRINGOP */
1998 m_386 | m_P4_NOCONA,
2000 /* X86_TUNE_QIMODE_MATH */
2003 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2004 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2005 might be considered for Generic32 if our scheme for avoiding partial
2006 stalls was more effective. */
2009 /* X86_TUNE_PROMOTE_QI_REGS */
2012 /* X86_TUNE_PROMOTE_HI_REGS */
2015 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2016 over esp addition. */
2017 m_386 | m_486 | m_PENT | m_PPRO,
2019 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2020 over esp addition. */
2023 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2024 over esp subtraction. */
2025 m_386 | m_486 | m_PENT | m_K6_GEODE,
2027 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2028 over esp subtraction. */
2029 m_PENT | m_K6_GEODE,
2031 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2032 for DFmode copies */
2033 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2035 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2036 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2038 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2039 conflict here in between PPro/Pentium4 based chips that thread 128bit
2040 SSE registers as single units versus K8 based chips that divide SSE
2041 registers to two 64bit halves. This knob promotes all store destinations
2042 to be 128bit to allow register renaming on 128bit SSE units, but usually
2043 results in one extra microop on 64bit SSE units. Experimental results
2044 shows that disabling this option on P4 brings over 20% SPECfp regression,
2045 while enabling it on K8 brings roughly 2.4% regression that can be partly
2046 masked by careful scheduling of moves. */
2047 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2049 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2050 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2052 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2055 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2058 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2059 are resolved on SSE register parts instead of whole registers, so we may
2060 maintain just lower part of scalar values in proper format leaving the
2061 upper part undefined. */
2064 /* X86_TUNE_SSE_TYPELESS_STORES */
2067 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2068 m_PPRO | m_P4_NOCONA,
2070 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2071 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2073 /* X86_TUNE_PROLOGUE_USING_MOVE */
2074 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2076 /* X86_TUNE_EPILOGUE_USING_MOVE */
2077 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2079 /* X86_TUNE_SHIFT1 */
2082 /* X86_TUNE_USE_FFREEP */
2085 /* X86_TUNE_INTER_UNIT_MOVES */
2086 ~(m_AMD_MULTIPLE | m_GENERIC),
2088 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2089 ~(m_AMDFAM10 | m_BDVER ),
2091 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2092 than 4 branch instructions in the 16 byte window. */
2093 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2095 /* X86_TUNE_SCHEDULE */
2096 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2098 /* X86_TUNE_USE_BT */
2099 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2101 /* X86_TUNE_USE_INCDEC */
2102 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2104 /* X86_TUNE_PAD_RETURNS */
2105 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2107 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2110 /* X86_TUNE_EXT_80387_CONSTANTS */
2111 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2113 /* X86_TUNE_SHORTEN_X87_SSE */
2116 /* X86_TUNE_AVOID_VECTOR_DECODE */
2117 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2119 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2120 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2123 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2124 vector path on AMD machines. */
2125 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2127 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2129 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2131 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2135 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2136 but one byte longer. */
2139 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2140 operand that cannot be represented using a modRM byte. The XOR
2141 replacement is long decoded, so this split helps here as well. */
2144 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2146 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2148 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2149 from integer to FP. */
2152 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2153 with a subsequent conditional jump instruction into a single
2154 compare-and-branch uop. */
2157 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2158 will impact LEA instruction selection. */
2161 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2165 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2166 at -O3. For the moment, the prefetching seems badly tuned for Intel
2168 m_K6_GEODE | m_AMD_MULTIPLE,
2170 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2171 the auto-vectorizer. */
2174 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2175 during reassociation of integer computation. */
2178 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2179 during reassociation of fp computation. */
2183 /* Feature tests against the various architecture variations. */
2184 unsigned char ix86_arch_features[X86_ARCH_LAST];
2186 /* Feature tests against the various architecture variations, used to create
2187 ix86_arch_features based on the processor mask. */
2188 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2189 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2190 ~(m_386 | m_486 | m_PENT | m_K6),
2192 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2195 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2198 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2201 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2205 static const unsigned int x86_accumulate_outgoing_args
2206 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2208 static const unsigned int x86_arch_always_fancy_math_387
2209 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2211 static const unsigned int x86_avx256_split_unaligned_load
2212 = m_COREI7 | m_GENERIC;
2214 static const unsigned int x86_avx256_split_unaligned_store
2215 = m_COREI7 | m_BDVER | m_GENERIC;
2217 /* In case the average insn count for single function invocation is
2218 lower than this constant, emit fast (but longer) prologue and
2220 #define FAST_PROLOGUE_INSN_COUNT 20
2222 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2223 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2224 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2225 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2227 /* Array of the smallest class containing reg number REGNO, indexed by
2228 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2230 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2232 /* ax, dx, cx, bx */
2233 AREG, DREG, CREG, BREG,
2234 /* si, di, bp, sp */
2235 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2237 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2238 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2241 /* flags, fpsr, fpcr, frame */
2242 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2244 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2247 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2250 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2251 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2252 /* SSE REX registers */
2253 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2257 /* The "default" register map used in 32bit mode. */
2259 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2261 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2262 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2263 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2264 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2265 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2270 /* The "default" register map used in 64bit mode. */
2272 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2274 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2275 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2276 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2277 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2278 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2279 8,9,10,11,12,13,14,15, /* extended integer registers */
2280 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2283 /* Define the register numbers to be used in Dwarf debugging information.
2284 The SVR4 reference port C compiler uses the following register numbers
2285 in its Dwarf output code:
2286 0 for %eax (gcc regno = 0)
2287 1 for %ecx (gcc regno = 2)
2288 2 for %edx (gcc regno = 1)
2289 3 for %ebx (gcc regno = 3)
2290 4 for %esp (gcc regno = 7)
2291 5 for %ebp (gcc regno = 6)
2292 6 for %esi (gcc regno = 4)
2293 7 for %edi (gcc regno = 5)
2294 The following three DWARF register numbers are never generated by
2295 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2296 believes these numbers have these meanings.
2297 8 for %eip (no gcc equivalent)
2298 9 for %eflags (gcc regno = 17)
2299 10 for %trapno (no gcc equivalent)
2300 It is not at all clear how we should number the FP stack registers
2301 for the x86 architecture. If the version of SDB on x86/svr4 were
2302 a bit less brain dead with respect to floating-point then we would
2303 have a precedent to follow with respect to DWARF register numbers
2304 for x86 FP registers, but the SDB on x86/svr4 is so completely
2305 broken with respect to FP registers that it is hardly worth thinking
2306 of it as something to strive for compatibility with.
2307 The version of x86/svr4 SDB I have at the moment does (partially)
2308 seem to believe that DWARF register number 11 is associated with
2309 the x86 register %st(0), but that's about all. Higher DWARF
2310 register numbers don't seem to be associated with anything in
2311 particular, and even for DWARF regno 11, SDB only seems to under-
2312 stand that it should say that a variable lives in %st(0) (when
2313 asked via an `=' command) if we said it was in DWARF regno 11,
2314 but SDB still prints garbage when asked for the value of the
2315 variable in question (via a `/' command).
2316 (Also note that the labels SDB prints for various FP stack regs
2317 when doing an `x' command are all wrong.)
2318 Note that these problems generally don't affect the native SVR4
2319 C compiler because it doesn't allow the use of -O with -g and
2320 because when it is *not* optimizing, it allocates a memory
2321 location for each floating-point variable, and the memory
2322 location is what gets described in the DWARF AT_location
2323 attribute for the variable in question.
2324 Regardless of the severe mental illness of the x86/svr4 SDB, we
2325 do something sensible here and we use the following DWARF
2326 register numbers. Note that these are all stack-top-relative
2328 11 for %st(0) (gcc regno = 8)
2329 12 for %st(1) (gcc regno = 9)
2330 13 for %st(2) (gcc regno = 10)
2331 14 for %st(3) (gcc regno = 11)
2332 15 for %st(4) (gcc regno = 12)
2333 16 for %st(5) (gcc regno = 13)
2334 17 for %st(6) (gcc regno = 14)
2335 18 for %st(7) (gcc regno = 15)
2337 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2339 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2340 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2341 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2342 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2343 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2345 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2348 /* Define parameter passing and return registers. */
2350 static int const x86_64_int_parameter_registers[6] =
2352 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2355 static int const x86_64_ms_abi_int_parameter_registers[4] =
2357 CX_REG, DX_REG, R8_REG, R9_REG
2360 static int const x86_64_int_return_registers[4] =
2362 AX_REG, DX_REG, DI_REG, SI_REG
2365 /* Define the structure for the machine field in struct function. */
2367 struct GTY(()) stack_local_entry {
2368 unsigned short mode;
2371 struct stack_local_entry *next;
2374 /* Structure describing stack frame layout.
2375 Stack grows downward:
2381 saved static chain if ix86_static_chain_on_stack
2383 saved frame pointer if frame_pointer_needed
2384 <- HARD_FRAME_POINTER
2390 <- sse_regs_save_offset
2393 [va_arg registers] |
2397 [padding2] | = to_allocate
2406 int outgoing_arguments_size;
2407 HOST_WIDE_INT frame;
2409 /* The offsets relative to ARG_POINTER. */
2410 HOST_WIDE_INT frame_pointer_offset;
2411 HOST_WIDE_INT hard_frame_pointer_offset;
2412 HOST_WIDE_INT stack_pointer_offset;
2413 HOST_WIDE_INT hfp_save_offset;
2414 HOST_WIDE_INT reg_save_offset;
2415 HOST_WIDE_INT sse_reg_save_offset;
2417 /* When save_regs_using_mov is set, emit prologue using
2418 move instead of push instructions. */
2419 bool save_regs_using_mov;
2422 /* Which cpu are we scheduling for. */
2423 enum attr_cpu ix86_schedule;
2425 /* Which cpu are we optimizing for. */
2426 enum processor_type ix86_tune;
2428 /* Which instruction set architecture to use. */
2429 enum processor_type ix86_arch;
2431 /* True if processor has SSE prefetch instruction. */
2432 int x86_prefetch_sse;
2434 /* True if processor has prefetchw instruction. */
2437 /* -mstackrealign option */
2438 static const char ix86_force_align_arg_pointer_string[]
2439 = "force_align_arg_pointer";
2441 static rtx (*ix86_gen_leave) (void);
2442 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2443 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2444 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2445 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2446 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2447 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2448 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2449 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2450 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2452 /* Preferred alignment for stack boundary in bits. */
2453 unsigned int ix86_preferred_stack_boundary;
2455 /* Alignment for incoming stack boundary in bits specified at
2457 static unsigned int ix86_user_incoming_stack_boundary;
2459 /* Default alignment for incoming stack boundary in bits. */
2460 static unsigned int ix86_default_incoming_stack_boundary;
2462 /* Alignment for incoming stack boundary in bits. */
2463 unsigned int ix86_incoming_stack_boundary;
2465 /* Calling abi specific va_list type nodes. */
2466 static GTY(()) tree sysv_va_list_type_node;
2467 static GTY(()) tree ms_va_list_type_node;
2469 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2470 char internal_label_prefix[16];
2471 int internal_label_prefix_len;
2473 /* Fence to use after loop using movnt. */
2476 /* Register class used for passing given 64bit part of the argument.
2477 These represent classes as documented by the PS ABI, with the exception
2478 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2479 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2481 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2482 whenever possible (upper half does contain padding). */
2483 enum x86_64_reg_class
2486 X86_64_INTEGER_CLASS,
2487 X86_64_INTEGERSI_CLASS,
2494 X86_64_COMPLEX_X87_CLASS,
2498 #define MAX_CLASSES 4
2500 /* Table of constants used by fldpi, fldln2, etc.... */
2501 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2502 static bool ext_80387_constants_init = 0;
2505 static struct machine_function * ix86_init_machine_status (void);
2506 static rtx ix86_function_value (const_tree, const_tree, bool);
2507 static bool ix86_function_value_regno_p (const unsigned int);
2508 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2510 static rtx ix86_static_chain (const_tree, bool);
2511 static int ix86_function_regparm (const_tree, const_tree);
2512 static void ix86_compute_frame_layout (struct ix86_frame *);
2513 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2515 static void ix86_add_new_builtins (HOST_WIDE_INT);
2516 static tree ix86_canonical_va_list_type (tree);
2517 static void predict_jump (int);
2518 static unsigned int split_stack_prologue_scratch_regno (void);
2519 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2521 enum ix86_function_specific_strings
2523 IX86_FUNCTION_SPECIFIC_ARCH,
2524 IX86_FUNCTION_SPECIFIC_TUNE,
2525 IX86_FUNCTION_SPECIFIC_MAX
2528 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2529 const char *, enum fpmath_unit, bool);
2530 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2531 static void ix86_function_specific_save (struct cl_target_option *);
2532 static void ix86_function_specific_restore (struct cl_target_option *);
2533 static void ix86_function_specific_print (FILE *, int,
2534 struct cl_target_option *);
2535 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2536 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2537 struct gcc_options *);
2538 static bool ix86_can_inline_p (tree, tree);
2539 static void ix86_set_current_function (tree);
2540 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2542 static enum calling_abi ix86_function_abi (const_tree);
2545 #ifndef SUBTARGET32_DEFAULT_CPU
2546 #define SUBTARGET32_DEFAULT_CPU "i386"
2549 /* The svr4 ABI for the i386 says that records and unions are returned
2551 #ifndef DEFAULT_PCC_STRUCT_RETURN
2552 #define DEFAULT_PCC_STRUCT_RETURN 1
2555 /* Whether -mtune= or -march= were specified */
2556 static int ix86_tune_defaulted;
2557 static int ix86_arch_specified;
2559 /* Vectorization library interface and handlers. */
2560 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2562 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2563 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2565 /* Processor target table, indexed by processor number */
2568 const struct processor_costs *cost; /* Processor costs */
2569 const int align_loop; /* Default alignments. */
2570 const int align_loop_max_skip;
2571 const int align_jump;
2572 const int align_jump_max_skip;
2573 const int align_func;
2576 static const struct ptt processor_target_table[PROCESSOR_max] =
2578 {&i386_cost, 4, 3, 4, 3, 4},
2579 {&i486_cost, 16, 15, 16, 15, 16},
2580 {&pentium_cost, 16, 7, 16, 7, 16},
2581 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2582 {&geode_cost, 0, 0, 0, 0, 0},
2583 {&k6_cost, 32, 7, 32, 7, 32},
2584 {&athlon_cost, 16, 7, 16, 7, 16},
2585 {&pentium4_cost, 0, 0, 0, 0, 0},
2586 {&k8_cost, 16, 7, 16, 7, 16},
2587 {&nocona_cost, 0, 0, 0, 0, 0},
2588 /* Core 2 32-bit. */
2589 {&generic32_cost, 16, 10, 16, 10, 16},
2590 /* Core 2 64-bit. */
2591 {&generic64_cost, 16, 10, 16, 10, 16},
2592 /* Core i7 32-bit. */
2593 {&generic32_cost, 16, 10, 16, 10, 16},
2594 /* Core i7 64-bit. */
2595 {&generic64_cost, 16, 10, 16, 10, 16},
2596 {&generic32_cost, 16, 7, 16, 7, 16},
2597 {&generic64_cost, 16, 10, 16, 10, 16},
2598 {&amdfam10_cost, 32, 24, 32, 7, 32},
2599 {&bdver1_cost, 32, 24, 32, 7, 32},
2600 {&bdver2_cost, 32, 24, 32, 7, 32},
2601 {&btver1_cost, 32, 24, 32, 7, 32},
2602 {&atom_cost, 16, 15, 16, 7, 16}
2605 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2635 /* Return true if a red-zone is in use. */
2638 ix86_using_red_zone (void)
2640 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2643 /* Return a string that documents the current -m options. The caller is
2644 responsible for freeing the string. */
2647 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2648 const char *tune, enum fpmath_unit fpmath,
2651 struct ix86_target_opts
2653 const char *option; /* option string */
2654 HOST_WIDE_INT mask; /* isa mask options */
2657 /* This table is ordered so that options like -msse4.2 that imply
2658 preceding options while match those first. */
2659 static struct ix86_target_opts isa_opts[] =
2661 { "-m64", OPTION_MASK_ISA_64BIT },
2662 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2663 { "-mfma", OPTION_MASK_ISA_FMA },
2664 { "-mxop", OPTION_MASK_ISA_XOP },
2665 { "-mlwp", OPTION_MASK_ISA_LWP },
2666 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2667 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2668 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2669 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2670 { "-msse3", OPTION_MASK_ISA_SSE3 },
2671 { "-msse2", OPTION_MASK_ISA_SSE2 },
2672 { "-msse", OPTION_MASK_ISA_SSE },
2673 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2674 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2675 { "-mmmx", OPTION_MASK_ISA_MMX },
2676 { "-mabm", OPTION_MASK_ISA_ABM },
2677 { "-mbmi", OPTION_MASK_ISA_BMI },
2678 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2679 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2680 { "-mtbm", OPTION_MASK_ISA_TBM },
2681 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2682 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2683 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2684 { "-maes", OPTION_MASK_ISA_AES },
2685 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2686 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2687 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2688 { "-mf16c", OPTION_MASK_ISA_F16C },
2692 static struct ix86_target_opts flag_opts[] =
2694 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2695 { "-m80387", MASK_80387 },
2696 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2697 { "-malign-double", MASK_ALIGN_DOUBLE },
2698 { "-mcld", MASK_CLD },
2699 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2700 { "-mieee-fp", MASK_IEEE_FP },
2701 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2702 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2703 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2704 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2705 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2706 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2707 { "-mno-red-zone", MASK_NO_RED_ZONE },
2708 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2709 { "-mrecip", MASK_RECIP },
2710 { "-mrtd", MASK_RTD },
2711 { "-msseregparm", MASK_SSEREGPARM },
2712 { "-mstack-arg-probe", MASK_STACK_PROBE },
2713 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2714 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2715 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2716 { "-mvzeroupper", MASK_VZEROUPPER },
2717 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2718 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2719 { "-mprefer-avx128", MASK_PREFER_AVX128},
2722 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2725 char target_other[40];
2734 memset (opts, '\0', sizeof (opts));
2736 /* Add -march= option. */
2739 opts[num][0] = "-march=";
2740 opts[num++][1] = arch;
2743 /* Add -mtune= option. */
2746 opts[num][0] = "-mtune=";
2747 opts[num++][1] = tune;
2750 /* Pick out the options in isa options. */
2751 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2753 if ((isa & isa_opts[i].mask) != 0)
2755 opts[num++][0] = isa_opts[i].option;
2756 isa &= ~ isa_opts[i].mask;
2760 if (isa && add_nl_p)
2762 opts[num++][0] = isa_other;
2763 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2767 /* Add flag options. */
2768 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2770 if ((flags & flag_opts[i].mask) != 0)
2772 opts[num++][0] = flag_opts[i].option;
2773 flags &= ~ flag_opts[i].mask;
2777 if (flags && add_nl_p)
2779 opts[num++][0] = target_other;
2780 sprintf (target_other, "(other flags: %#x)", flags);
2783 /* Add -fpmath= option. */
2786 opts[num][0] = "-mfpmath=";
2787 switch ((int) fpmath)
2790 opts[num++][1] = "387";
2794 opts[num++][1] = "sse";
2797 case FPMATH_387 | FPMATH_SSE:
2798 opts[num++][1] = "sse+387";
2810 gcc_assert (num < ARRAY_SIZE (opts));
2812 /* Size the string. */
2814 sep_len = (add_nl_p) ? 3 : 1;
2815 for (i = 0; i < num; i++)
2818 for (j = 0; j < 2; j++)
2820 len += strlen (opts[i][j]);
2823 /* Build the string. */
2824 ret = ptr = (char *) xmalloc (len);
2827 for (i = 0; i < num; i++)
2831 for (j = 0; j < 2; j++)
2832 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2839 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2847 for (j = 0; j < 2; j++)
2850 memcpy (ptr, opts[i][j], len2[j]);
2852 line_len += len2[j];
2857 gcc_assert (ret + len >= ptr);
2862 /* Return true, if profiling code should be emitted before
2863 prologue. Otherwise it returns false.
2864 Note: For x86 with "hotfix" it is sorried. */
2866 ix86_profile_before_prologue (void)
2868 return flag_fentry != 0;
2871 /* Function that is callable from the debugger to print the current
2874 ix86_debug_options (void)
2876 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2877 ix86_arch_string, ix86_tune_string,
2882 fprintf (stderr, "%s\n\n", opts);
2886 fputs ("<no options>\n\n", stderr);
2891 /* Override various settings based on options. If MAIN_ARGS_P, the
2892 options are from the command line, otherwise they are from
2896 ix86_option_override_internal (bool main_args_p)
2899 unsigned int ix86_arch_mask, ix86_tune_mask;
2900 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2905 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2906 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2907 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2908 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2909 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2910 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2911 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2912 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2913 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2914 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2915 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2916 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2917 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2918 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2919 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2920 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2921 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2922 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2923 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2924 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2925 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2926 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2927 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2928 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2929 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2930 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2931 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2932 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2933 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2934 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2935 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2936 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2937 #define PTA_PREFETCHW (HOST_WIDE_INT_1 << 32)
2939 /* if this reaches 64, need to widen struct pta flags below */
2943 const char *const name; /* processor name or nickname. */
2944 const enum processor_type processor;
2945 const enum attr_cpu schedule;
2946 const unsigned HOST_WIDE_INT flags;
2948 const processor_alias_table[] =
2950 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2951 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2952 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2953 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2954 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2955 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2956 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2957 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2958 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2959 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2960 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2961 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2962 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2964 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2966 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2967 PTA_MMX | PTA_SSE | PTA_SSE2},
2968 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2969 PTA_MMX |PTA_SSE | PTA_SSE2},
2970 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2971 PTA_MMX | PTA_SSE | PTA_SSE2},
2972 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2973 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2974 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2975 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2976 | PTA_CX16 | PTA_NO_SAHF},
2977 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2978 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2979 | PTA_SSSE3 | PTA_CX16},
2980 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2981 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2982 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_POPCNT},
2983 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2984 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2985 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2986 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2987 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
2988 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2989 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2990 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2991 | PTA_RDRND | PTA_F16C},
2992 {"core-avx2", PROCESSOR_COREI7_64, CPU_COREI7,
2993 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2994 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
2995 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2996 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
2997 | PTA_FMA | PTA_MOVBE},
2998 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2999 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3000 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3001 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3002 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3003 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3004 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3005 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3006 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3007 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3008 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3009 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3010 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3011 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3012 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3013 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3014 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3015 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3016 {"x86-64", PROCESSOR_K8, CPU_K8,
3017 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3018 {"k8", PROCESSOR_K8, CPU_K8,
3019 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3020 | PTA_SSE2 | PTA_NO_SAHF},
3021 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3022 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3023 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3024 {"opteron", PROCESSOR_K8, CPU_K8,
3025 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3026 | PTA_SSE2 | PTA_NO_SAHF},
3027 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3028 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3029 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3030 {"athlon64", PROCESSOR_K8, CPU_K8,
3031 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3032 | PTA_SSE2 | PTA_NO_SAHF},
3033 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3034 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3035 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3036 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3037 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3038 | PTA_SSE2 | PTA_NO_SAHF},
3039 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3040 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3041 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3042 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3043 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3044 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3045 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3046 PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
3047 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
3048 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3049 | PTA_FMA4 | PTA_XOP | PTA_LWP},
3050 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3051 PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
3052 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
3053 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3054 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3056 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3057 PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
3058 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16},
3059 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3060 0 /* flags are only used for -march switch. */ },
3061 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3062 PTA_64BIT /* flags are only used for -march switch. */ },
3065 /* -mrecip options. */
3068 const char *string; /* option name */
3069 unsigned int mask; /* mask bits to set */
3071 const recip_options[] =
3073 { "all", RECIP_MASK_ALL },
3074 { "none", RECIP_MASK_NONE },
3075 { "div", RECIP_MASK_DIV },
3076 { "sqrt", RECIP_MASK_SQRT },
3077 { "vec-div", RECIP_MASK_VEC_DIV },
3078 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3081 int const pta_size = ARRAY_SIZE (processor_alias_table);
3083 /* Set up prefix/suffix so the error messages refer to either the command
3084 line argument, or the attribute(target). */
3093 prefix = "option(\"";
3098 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3099 SUBTARGET_OVERRIDE_OPTIONS;
3102 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3103 SUBSUBTARGET_OVERRIDE_OPTIONS;
3107 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3109 /* -fPIC is the default for x86_64. */
3110 if (TARGET_MACHO && TARGET_64BIT)
3113 /* Need to check -mtune=generic first. */
3114 if (ix86_tune_string)
3116 if (!strcmp (ix86_tune_string, "generic")
3117 || !strcmp (ix86_tune_string, "i686")
3118 /* As special support for cross compilers we read -mtune=native
3119 as -mtune=generic. With native compilers we won't see the
3120 -mtune=native, as it was changed by the driver. */
3121 || !strcmp (ix86_tune_string, "native"))
3124 ix86_tune_string = "generic64";
3126 ix86_tune_string = "generic32";
3128 /* If this call is for setting the option attribute, allow the
3129 generic32/generic64 that was previously set. */
3130 else if (!main_args_p
3131 && (!strcmp (ix86_tune_string, "generic32")
3132 || !strcmp (ix86_tune_string, "generic64")))
3134 else if (!strncmp (ix86_tune_string, "generic", 7))
3135 error ("bad value (%s) for %stune=%s %s",
3136 ix86_tune_string, prefix, suffix, sw);
3137 else if (!strcmp (ix86_tune_string, "x86-64"))
3138 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3139 "%stune=k8%s or %stune=generic%s instead as appropriate",
3140 prefix, suffix, prefix, suffix, prefix, suffix);
3144 if (ix86_arch_string)
3145 ix86_tune_string = ix86_arch_string;
3146 if (!ix86_tune_string)
3148 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3149 ix86_tune_defaulted = 1;
3152 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3153 need to use a sensible tune option. */
3154 if (!strcmp (ix86_tune_string, "generic")
3155 || !strcmp (ix86_tune_string, "x86-64")
3156 || !strcmp (ix86_tune_string, "i686"))
3159 ix86_tune_string = "generic64";
3161 ix86_tune_string = "generic32";
3165 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3167 /* rep; movq isn't available in 32-bit code. */
3168 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3169 ix86_stringop_alg = no_stringop;
3172 if (!ix86_arch_string)
3173 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3175 ix86_arch_specified = 1;
3177 if (!global_options_set.x_ix86_abi)
3178 ix86_abi = DEFAULT_ABI;
3180 if (global_options_set.x_ix86_cmodel)
3182 switch (ix86_cmodel)
3187 ix86_cmodel = CM_SMALL_PIC;
3189 error ("code model %qs not supported in the %s bit mode",
3196 ix86_cmodel = CM_MEDIUM_PIC;
3198 error ("code model %qs not supported in the %s bit mode",
3200 else if (TARGET_X32)
3201 error ("code model %qs not supported in x32 mode",
3208 ix86_cmodel = CM_LARGE_PIC;
3210 error ("code model %qs not supported in the %s bit mode",
3212 else if (TARGET_X32)
3213 error ("code model %qs not supported in x32 mode",
3219 error ("code model %s does not support PIC mode", "32");
3221 error ("code model %qs not supported in the %s bit mode",
3228 error ("code model %s does not support PIC mode", "kernel");
3229 ix86_cmodel = CM_32;
3232 error ("code model %qs not supported in the %s bit mode",
3242 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3243 use of rip-relative addressing. This eliminates fixups that
3244 would otherwise be needed if this object is to be placed in a
3245 DLL, and is essentially just as efficient as direct addressing. */
3246 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3247 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3248 else if (TARGET_64BIT)
3249 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3251 ix86_cmodel = CM_32;
3253 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3255 error ("-masm=intel not supported in this configuration");
3256 ix86_asm_dialect = ASM_ATT;
3258 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3259 sorry ("%i-bit mode not compiled in",
3260 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3262 for (i = 0; i < pta_size; i++)
3263 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3265 ix86_schedule = processor_alias_table[i].schedule;
3266 ix86_arch = processor_alias_table[i].processor;
3267 /* Default cpu tuning to the architecture. */
3268 ix86_tune = ix86_arch;
3270 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3271 error ("CPU you selected does not support x86-64 "
3274 if (processor_alias_table[i].flags & PTA_MMX
3275 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3276 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3277 if (processor_alias_table[i].flags & PTA_3DNOW
3278 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3279 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3280 if (processor_alias_table[i].flags & PTA_3DNOW_A
3281 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3282 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3283 if (processor_alias_table[i].flags & PTA_SSE
3284 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3285 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3286 if (processor_alias_table[i].flags & PTA_SSE2
3287 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3288 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3289 if (processor_alias_table[i].flags & PTA_SSE3
3290 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3291 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3292 if (processor_alias_table[i].flags & PTA_SSSE3
3293 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3294 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3295 if (processor_alias_table[i].flags & PTA_SSE4_1
3296 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3297 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3298 if (processor_alias_table[i].flags & PTA_SSE4_2
3299 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3300 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3301 if (processor_alias_table[i].flags & PTA_AVX
3302 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3303 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3304 if (processor_alias_table[i].flags & PTA_AVX2
3305 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3306 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3307 if (processor_alias_table[i].flags & PTA_FMA
3308 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3309 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3310 if (processor_alias_table[i].flags & PTA_SSE4A
3311 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3312 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3313 if (processor_alias_table[i].flags & PTA_FMA4
3314 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3315 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3316 if (processor_alias_table[i].flags & PTA_XOP
3317 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3318 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3319 if (processor_alias_table[i].flags & PTA_LWP
3320 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3321 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3322 if (processor_alias_table[i].flags & PTA_ABM
3323 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3324 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3325 if (processor_alias_table[i].flags & PTA_BMI
3326 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3327 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3328 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3329 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3330 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3331 if (processor_alias_table[i].flags & PTA_TBM
3332 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3333 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3334 if (processor_alias_table[i].flags & PTA_BMI2
3335 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3336 ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3337 if (processor_alias_table[i].flags & PTA_CX16
3338 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3339 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3340 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3341 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3342 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3343 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3344 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3345 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3346 if (processor_alias_table[i].flags & PTA_MOVBE
3347 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3348 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3349 if (processor_alias_table[i].flags & PTA_AES
3350 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3351 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3352 if (processor_alias_table[i].flags & PTA_PCLMUL
3353 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3354 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3355 if (processor_alias_table[i].flags & PTA_FSGSBASE
3356 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3357 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3358 if (processor_alias_table[i].flags & PTA_RDRND
3359 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3360 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3361 if (processor_alias_table[i].flags & PTA_F16C
3362 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3363 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3364 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3365 x86_prefetch_sse = true;
3366 if (processor_alias_table[i].flags & PTA_PREFETCHW)
3367 x86_prefetchw = true;
3372 if (!strcmp (ix86_arch_string, "generic"))
3373 error ("generic CPU can be used only for %stune=%s %s",
3374 prefix, suffix, sw);
3375 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3376 error ("bad value (%s) for %sarch=%s %s",
3377 ix86_arch_string, prefix, suffix, sw);
3379 ix86_arch_mask = 1u << ix86_arch;
3380 for (i = 0; i < X86_ARCH_LAST; ++i)
3381 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3383 for (i = 0; i < pta_size; i++)
3384 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3386 ix86_schedule = processor_alias_table[i].schedule;
3387 ix86_tune = processor_alias_table[i].processor;
3390 if (!(processor_alias_table[i].flags & PTA_64BIT))
3392 if (ix86_tune_defaulted)
3394 ix86_tune_string = "x86-64";
3395 for (i = 0; i < pta_size; i++)
3396 if (! strcmp (ix86_tune_string,
3397 processor_alias_table[i].name))
3399 ix86_schedule = processor_alias_table[i].schedule;
3400 ix86_tune = processor_alias_table[i].processor;
3403 error ("CPU you selected does not support x86-64 "
3409 /* Adjust tuning when compiling for 32-bit ABI. */
3412 case PROCESSOR_GENERIC64:
3413 ix86_tune = PROCESSOR_GENERIC32;
3414 ix86_schedule = CPU_PENTIUMPRO;
3417 case PROCESSOR_CORE2_64:
3418 ix86_tune = PROCESSOR_CORE2_32;
3421 case PROCESSOR_COREI7_64:
3422 ix86_tune = PROCESSOR_COREI7_32;
3429 /* Intel CPUs have always interpreted SSE prefetch instructions as
3430 NOPs; so, we can enable SSE prefetch instructions even when
3431 -mtune (rather than -march) points us to a processor that has them.
3432 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3433 higher processors. */
3435 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3436 x86_prefetch_sse = true;
3440 if (ix86_tune_specified && i == pta_size)
3441 error ("bad value (%s) for %stune=%s %s",
3442 ix86_tune_string, prefix, suffix, sw);
3444 ix86_tune_mask = 1u << ix86_tune;
3445 for (i = 0; i < X86_TUNE_LAST; ++i)
3446 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3448 #ifndef USE_IX86_FRAME_POINTER
3449 #define USE_IX86_FRAME_POINTER 0
3452 #ifndef USE_X86_64_FRAME_POINTER
3453 #define USE_X86_64_FRAME_POINTER 0
3456 /* Set the default values for switches whose default depends on TARGET_64BIT
3457 in case they weren't overwritten by command line options. */
3460 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3461 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3462 if (flag_asynchronous_unwind_tables == 2)
3463 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3464 if (flag_pcc_struct_return == 2)
3465 flag_pcc_struct_return = 0;
3469 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3470 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3471 if (flag_asynchronous_unwind_tables == 2)
3472 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3473 if (flag_pcc_struct_return == 2)
3474 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3478 ix86_cost = &ix86_size_cost;
3480 ix86_cost = processor_target_table[ix86_tune].cost;
3482 /* Arrange to set up i386_stack_locals for all functions. */
3483 init_machine_status = ix86_init_machine_status;
3485 /* Validate -mregparm= value. */
3486 if (global_options_set.x_ix86_regparm)
3489 warning (0, "-mregparm is ignored in 64-bit mode");
3490 if (ix86_regparm > REGPARM_MAX)
3492 error ("-mregparm=%d is not between 0 and %d",
3493 ix86_regparm, REGPARM_MAX);
3498 ix86_regparm = REGPARM_MAX;
3500 /* Default align_* from the processor table. */
3501 if (align_loops == 0)
3503 align_loops = processor_target_table[ix86_tune].align_loop;
3504 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3506 if (align_jumps == 0)
3508 align_jumps = processor_target_table[ix86_tune].align_jump;
3509 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3511 if (align_functions == 0)
3513 align_functions = processor_target_table[ix86_tune].align_func;
3516 /* Provide default for -mbranch-cost= value. */
3517 if (!global_options_set.x_ix86_branch_cost)
3518 ix86_branch_cost = ix86_cost->branch_cost;
3522 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3524 /* Enable by default the SSE and MMX builtins. Do allow the user to
3525 explicitly disable any of these. In particular, disabling SSE and
3526 MMX for kernel code is extremely useful. */
3527 if (!ix86_arch_specified)
3529 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3530 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3533 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3537 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3539 if (!ix86_arch_specified)
3541 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3543 /* i386 ABI does not specify red zone. It still makes sense to use it
3544 when programmer takes care to stack from being destroyed. */
3545 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3546 target_flags |= MASK_NO_RED_ZONE;
3549 /* Keep nonleaf frame pointers. */
3550 if (flag_omit_frame_pointer)
3551 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3552 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3553 flag_omit_frame_pointer = 1;
3555 /* If we're doing fast math, we don't care about comparison order
3556 wrt NaNs. This lets us use a shorter comparison sequence. */
3557 if (flag_finite_math_only)
3558 target_flags &= ~MASK_IEEE_FP;
3560 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3561 since the insns won't need emulation. */
3562 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3563 target_flags &= ~MASK_NO_FANCY_MATH_387;
3565 /* Likewise, if the target doesn't have a 387, or we've specified
3566 software floating point, don't use 387 inline intrinsics. */
3568 target_flags |= MASK_NO_FANCY_MATH_387;
3570 /* Turn on MMX builtins for -msse. */
3573 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3574 x86_prefetch_sse = true;
3577 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3578 if (TARGET_SSE4_2 || TARGET_ABM)
3579 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3581 /* Turn on lzcnt instruction for -mabm. */
3583 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3585 /* Validate -mpreferred-stack-boundary= value or default it to
3586 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3587 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3588 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3590 int min = (TARGET_64BIT ? 4 : 2);
3591 int max = (TARGET_SEH ? 4 : 12);
3593 if (ix86_preferred_stack_boundary_arg < min
3594 || ix86_preferred_stack_boundary_arg > max)
3597 error ("-mpreferred-stack-boundary is not supported "
3600 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3601 ix86_preferred_stack_boundary_arg, min, max);
3604 ix86_preferred_stack_boundary
3605 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3608 /* Set the default value for -mstackrealign. */
3609 if (ix86_force_align_arg_pointer == -1)
3610 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3612 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3614 /* Validate -mincoming-stack-boundary= value or default it to
3615 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3616 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3617 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3619 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3620 || ix86_incoming_stack_boundary_arg > 12)
3621 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3622 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3625 ix86_user_incoming_stack_boundary
3626 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3627 ix86_incoming_stack_boundary
3628 = ix86_user_incoming_stack_boundary;
3632 /* Accept -msseregparm only if at least SSE support is enabled. */
3633 if (TARGET_SSEREGPARM
3635 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3637 if (global_options_set.x_ix86_fpmath)
3639 if (ix86_fpmath & FPMATH_SSE)
3643 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3644 ix86_fpmath = FPMATH_387;
3646 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3648 warning (0, "387 instruction set disabled, using SSE arithmetics");
3649 ix86_fpmath = FPMATH_SSE;
3654 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3656 /* If the i387 is disabled, then do not return values in it. */
3658 target_flags &= ~MASK_FLOAT_RETURNS;
3660 /* Use external vectorized library in vectorizing intrinsics. */
3661 if (global_options_set.x_ix86_veclibabi_type)
3662 switch (ix86_veclibabi_type)
3664 case ix86_veclibabi_type_svml:
3665 ix86_veclib_handler = ix86_veclibabi_svml;
3668 case ix86_veclibabi_type_acml:
3669 ix86_veclib_handler = ix86_veclibabi_acml;
3676 if ((!USE_IX86_FRAME_POINTER
3677 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3678 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3680 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3682 /* ??? Unwind info is not correct around the CFG unless either a frame
3683 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3684 unwind info generation to be aware of the CFG and propagating states
3686 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3687 || flag_exceptions || flag_non_call_exceptions)
3688 && flag_omit_frame_pointer
3689 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3691 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3692 warning (0, "unwind tables currently require either a frame pointer "
3693 "or %saccumulate-outgoing-args%s for correctness",
3695 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3698 /* If stack probes are required, the space used for large function
3699 arguments on the stack must also be probed, so enable
3700 -maccumulate-outgoing-args so this happens in the prologue. */
3701 if (TARGET_STACK_PROBE
3702 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3704 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3705 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3706 "for correctness", prefix, suffix);
3707 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3710 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3713 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3714 p = strchr (internal_label_prefix, 'X');
3715 internal_label_prefix_len = p - internal_label_prefix;
3719 /* When scheduling description is not available, disable scheduler pass
3720 so it won't slow down the compilation and make x87 code slower. */
3721 if (!TARGET_SCHEDULE)
3722 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3724 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3725 ix86_cost->simultaneous_prefetches,
3726 global_options.x_param_values,
3727 global_options_set.x_param_values);
3728 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3729 global_options.x_param_values,
3730 global_options_set.x_param_values);
3731 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3732 global_options.x_param_values,
3733 global_options_set.x_param_values);
3734 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3735 global_options.x_param_values,
3736 global_options_set.x_param_values);
3738 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3739 if (flag_prefetch_loop_arrays < 0
3742 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3743 flag_prefetch_loop_arrays = 1;
3745 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3746 can be optimized to ap = __builtin_next_arg (0). */
3747 if (!TARGET_64BIT && !flag_split_stack)
3748 targetm.expand_builtin_va_start = NULL;
3752 ix86_gen_leave = gen_leave_rex64;
3753 ix86_gen_add3 = gen_adddi3;
3754 ix86_gen_sub3 = gen_subdi3;
3755 ix86_gen_sub3_carry = gen_subdi3_carry;
3756 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3757 ix86_gen_monitor = gen_sse3_monitor64;
3758 ix86_gen_andsp = gen_anddi3;
3759 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3760 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3761 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3765 ix86_gen_leave = gen_leave;
3766 ix86_gen_add3 = gen_addsi3;
3767 ix86_gen_sub3 = gen_subsi3;
3768 ix86_gen_sub3_carry = gen_subsi3_carry;
3769 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3770 ix86_gen_monitor = gen_sse3_monitor;
3771 ix86_gen_andsp = gen_andsi3;
3772 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3773 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3774 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3778 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3780 target_flags |= MASK_CLD & ~target_flags_explicit;
3783 if (!TARGET_64BIT && flag_pic)
3785 if (flag_fentry > 0)
3786 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3790 else if (TARGET_SEH)
3792 if (flag_fentry == 0)
3793 sorry ("-mno-fentry isn%'t compatible with SEH");
3796 else if (flag_fentry < 0)
3798 #if defined(PROFILE_BEFORE_PROLOGUE)
3807 /* When not optimize for size, enable vzeroupper optimization for
3808 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3809 AVX unaligned load/store. */
3812 if (flag_expensive_optimizations
3813 && !(target_flags_explicit & MASK_VZEROUPPER))
3814 target_flags |= MASK_VZEROUPPER;
3815 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3816 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3817 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3818 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3819 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3820 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3821 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3822 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3823 target_flags |= MASK_PREFER_AVX128;
3828 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3829 target_flags &= ~MASK_VZEROUPPER;
3832 if (ix86_recip_name)
3834 char *p = ASTRDUP (ix86_recip_name);
3836 unsigned int mask, i;
3839 while ((q = strtok (p, ",")) != NULL)
3850 if (!strcmp (q, "default"))
3851 mask = RECIP_MASK_ALL;
3854 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
3855 if (!strcmp (q, recip_options[i].string))
3857 mask = recip_options[i].mask;
3861 if (i == ARRAY_SIZE (recip_options))
3863 error ("unknown option for -mrecip=%s", q);
3865 mask = RECIP_MASK_NONE;
3869 recip_mask_explicit |= mask;
3871 recip_mask &= ~mask;
3878 recip_mask |= RECIP_MASK_ALL & ~recip_mask_explicit;
3879 else if (target_flags_explicit & MASK_RECIP)
3880 recip_mask &= ~(RECIP_MASK_ALL & ~recip_mask_explicit);
3882 /* Save the initial options in case the user does function specific
3885 target_option_default_node = target_option_current_node
3886 = build_target_option_node ();
3889 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3892 function_pass_avx256_p (const_rtx val)
3897 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3900 if (GET_CODE (val) == PARALLEL)
3905 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3907 r = XVECEXP (val, 0, i);
3908 if (GET_CODE (r) == EXPR_LIST
3910 && REG_P (XEXP (r, 0))
3911 && (GET_MODE (XEXP (r, 0)) == OImode
3912 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3920 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3923 ix86_option_override (void)
3925 ix86_option_override_internal (true);
3928 /* Update register usage after having seen the compiler flags. */
3931 ix86_conditional_register_usage (void)
3936 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3938 if (fixed_regs[i] > 1)
3939 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3940 if (call_used_regs[i] > 1)
3941 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3944 /* The PIC register, if it exists, is fixed. */
3945 j = PIC_OFFSET_TABLE_REGNUM;
3946 if (j != INVALID_REGNUM)
3947 fixed_regs[j] = call_used_regs[j] = 1;
3949 /* The 64-bit MS_ABI changes the set of call-used registers. */
3950 if (TARGET_64BIT_MS_ABI)
3952 call_used_regs[SI_REG] = 0;
3953 call_used_regs[DI_REG] = 0;
3954 call_used_regs[XMM6_REG] = 0;
3955 call_used_regs[XMM7_REG] = 0;
3956 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3957 call_used_regs[i] = 0;
3960 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3961 other call-clobbered regs for 64-bit. */
3964 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3966 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3967 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3968 && call_used_regs[i])
3969 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3972 /* If MMX is disabled, squash the registers. */
3974 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3975 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3976 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3978 /* If SSE is disabled, squash the registers. */
3980 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3981 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3982 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3984 /* If the FPU is disabled, squash the registers. */
3985 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3986 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3987 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3988 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3990 /* If 32-bit, squash the 64-bit registers. */
3993 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3995 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4001 /* Save the current options */
4004 ix86_function_specific_save (struct cl_target_option *ptr)
4006 ptr->arch = ix86_arch;
4007 ptr->schedule = ix86_schedule;
4008 ptr->tune = ix86_tune;
4009 ptr->branch_cost = ix86_branch_cost;
4010 ptr->tune_defaulted = ix86_tune_defaulted;
4011 ptr->arch_specified = ix86_arch_specified;
4012 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4013 ptr->ix86_target_flags_explicit = target_flags_explicit;
4014 ptr->x_recip_mask_explicit = recip_mask_explicit;
4016 /* The fields are char but the variables are not; make sure the
4017 values fit in the fields. */
4018 gcc_assert (ptr->arch == ix86_arch);
4019 gcc_assert (ptr->schedule == ix86_schedule);
4020 gcc_assert (ptr->tune == ix86_tune);
4021 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4024 /* Restore the current options */
4027 ix86_function_specific_restore (struct cl_target_option *ptr)
4029 enum processor_type old_tune = ix86_tune;
4030 enum processor_type old_arch = ix86_arch;
4031 unsigned int ix86_arch_mask, ix86_tune_mask;
4034 ix86_arch = (enum processor_type) ptr->arch;
4035 ix86_schedule = (enum attr_cpu) ptr->schedule;
4036 ix86_tune = (enum processor_type) ptr->tune;
4037 ix86_branch_cost = ptr->branch_cost;
4038 ix86_tune_defaulted = ptr->tune_defaulted;
4039 ix86_arch_specified = ptr->arch_specified;
4040 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4041 target_flags_explicit = ptr->ix86_target_flags_explicit;
4042 recip_mask_explicit = ptr->x_recip_mask_explicit;
4044 /* Recreate the arch feature tests if the arch changed */
4045 if (old_arch != ix86_arch)
4047 ix86_arch_mask = 1u << ix86_arch;
4048 for (i = 0; i < X86_ARCH_LAST; ++i)
4049 ix86_arch_features[i]
4050 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4053 /* Recreate the tune optimization tests */
4054 if (old_tune != ix86_tune)
4056 ix86_tune_mask = 1u << ix86_tune;
4057 for (i = 0; i < X86_TUNE_LAST; ++i)
4058 ix86_tune_features[i]
4059 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4063 /* Print the current options */
4066 ix86_function_specific_print (FILE *file, int indent,
4067 struct cl_target_option *ptr)
4070 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4071 NULL, NULL, ptr->x_ix86_fpmath, false);
4073 fprintf (file, "%*sarch = %d (%s)\n",
4076 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4077 ? cpu_names[ptr->arch]
4080 fprintf (file, "%*stune = %d (%s)\n",
4083 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4084 ? cpu_names[ptr->tune]
4087 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4091 fprintf (file, "%*s%s\n", indent, "", target_string);
4092 free (target_string);
4097 /* Inner function to process the attribute((target(...))), take an argument and
4098 set the current options from the argument. If we have a list, recursively go
4102 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4103 struct gcc_options *enum_opts_set)
4108 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4109 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4110 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4111 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4112 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4128 enum ix86_opt_type type;
4133 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4134 IX86_ATTR_ISA ("abm", OPT_mabm),
4135 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4136 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4137 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4138 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4139 IX86_ATTR_ISA ("aes", OPT_maes),
4140 IX86_ATTR_ISA ("avx", OPT_mavx),
4141 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4142 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4143 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4144 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4145 IX86_ATTR_ISA ("sse", OPT_msse),
4146 IX86_ATTR_ISA ("sse2", OPT_msse2),
4147 IX86_ATTR_ISA ("sse3", OPT_msse3),
4148 IX86_ATTR_ISA ("sse4", OPT_msse4),
4149 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4150 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4151 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4152 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4153 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4154 IX86_ATTR_ISA ("fma", OPT_mfma),
4155 IX86_ATTR_ISA ("xop", OPT_mxop),
4156 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4157 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4158 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4159 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4162 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4164 /* string options */
4165 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4166 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4169 IX86_ATTR_YES ("cld",
4173 IX86_ATTR_NO ("fancy-math-387",
4174 OPT_mfancy_math_387,
4175 MASK_NO_FANCY_MATH_387),
4177 IX86_ATTR_YES ("ieee-fp",
4181 IX86_ATTR_YES ("inline-all-stringops",
4182 OPT_minline_all_stringops,
4183 MASK_INLINE_ALL_STRINGOPS),
4185 IX86_ATTR_YES ("inline-stringops-dynamically",
4186 OPT_minline_stringops_dynamically,
4187 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4189 IX86_ATTR_NO ("align-stringops",
4190 OPT_mno_align_stringops,
4191 MASK_NO_ALIGN_STRINGOPS),
4193 IX86_ATTR_YES ("recip",
4199 /* If this is a list, recurse to get the options. */
4200 if (TREE_CODE (args) == TREE_LIST)
4204 for (; args; args = TREE_CHAIN (args))
4205 if (TREE_VALUE (args)
4206 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4207 p_strings, enum_opts_set))
4213 else if (TREE_CODE (args) != STRING_CST)
4216 /* Handle multiple arguments separated by commas. */
4217 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4219 while (next_optstr && *next_optstr != '\0')
4221 char *p = next_optstr;
4223 char *comma = strchr (next_optstr, ',');
4224 const char *opt_string;
4225 size_t len, opt_len;
4230 enum ix86_opt_type type = ix86_opt_unknown;
4236 len = comma - next_optstr;
4237 next_optstr = comma + 1;
4245 /* Recognize no-xxx. */
4246 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4255 /* Find the option. */
4258 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4260 type = attrs[i].type;
4261 opt_len = attrs[i].len;
4262 if (ch == attrs[i].string[0]
4263 && ((type != ix86_opt_str && type != ix86_opt_enum)
4266 && memcmp (p, attrs[i].string, opt_len) == 0)
4269 mask = attrs[i].mask;
4270 opt_string = attrs[i].string;
4275 /* Process the option. */
4278 error ("attribute(target(\"%s\")) is unknown", orig_p);
4282 else if (type == ix86_opt_isa)
4284 struct cl_decoded_option decoded;
4286 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4287 ix86_handle_option (&global_options, &global_options_set,
4288 &decoded, input_location);
4291 else if (type == ix86_opt_yes || type == ix86_opt_no)
4293 if (type == ix86_opt_no)
4294 opt_set_p = !opt_set_p;
4297 target_flags |= mask;
4299 target_flags &= ~mask;
4302 else if (type == ix86_opt_str)
4306 error ("option(\"%s\") was already specified", opt_string);
4310 p_strings[opt] = xstrdup (p + opt_len);
4313 else if (type == ix86_opt_enum)
4318 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4320 set_option (&global_options, enum_opts_set, opt, value,
4321 p + opt_len, DK_UNSPECIFIED, input_location,
4325 error ("attribute(target(\"%s\")) is unknown", orig_p);
4337 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4340 ix86_valid_target_attribute_tree (tree args)
4342 const char *orig_arch_string = ix86_arch_string;
4343 const char *orig_tune_string = ix86_tune_string;
4344 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4345 int orig_tune_defaulted = ix86_tune_defaulted;
4346 int orig_arch_specified = ix86_arch_specified;
4347 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4350 struct cl_target_option *def
4351 = TREE_TARGET_OPTION (target_option_default_node);
4352 struct gcc_options enum_opts_set;
4354 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4356 /* Process each of the options on the chain. */
4357 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4361 /* If the changed options are different from the default, rerun
4362 ix86_option_override_internal, and then save the options away.
4363 The string options are are attribute options, and will be undone
4364 when we copy the save structure. */
4365 if (ix86_isa_flags != def->x_ix86_isa_flags
4366 || target_flags != def->x_target_flags
4367 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4368 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4369 || enum_opts_set.x_ix86_fpmath)
4371 /* If we are using the default tune= or arch=, undo the string assigned,
4372 and use the default. */
4373 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4374 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4375 else if (!orig_arch_specified)
4376 ix86_arch_string = NULL;
4378 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4379 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4380 else if (orig_tune_defaulted)
4381 ix86_tune_string = NULL;
4383 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4384 if (enum_opts_set.x_ix86_fpmath)
4385 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4386 else if (!TARGET_64BIT && TARGET_SSE)
4388 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4389 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4392 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4393 ix86_option_override_internal (false);
4395 /* Add any builtin functions with the new isa if any. */
4396 ix86_add_new_builtins (ix86_isa_flags);
4398 /* Save the current options unless we are validating options for
4400 t = build_target_option_node ();
4402 ix86_arch_string = orig_arch_string;
4403 ix86_tune_string = orig_tune_string;
4404 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4406 /* Free up memory allocated to hold the strings */
4407 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4408 free (option_strings[i]);
4414 /* Hook to validate attribute((target("string"))). */
4417 ix86_valid_target_attribute_p (tree fndecl,
4418 tree ARG_UNUSED (name),
4420 int ARG_UNUSED (flags))
4422 struct cl_target_option cur_target;
4424 tree old_optimize = build_optimization_node ();
4425 tree new_target, new_optimize;
4426 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4428 /* If the function changed the optimization levels as well as setting target
4429 options, start with the optimizations specified. */
4430 if (func_optimize && func_optimize != old_optimize)
4431 cl_optimization_restore (&global_options,
4432 TREE_OPTIMIZATION (func_optimize));
4434 /* The target attributes may also change some optimization flags, so update
4435 the optimization options if necessary. */
4436 cl_target_option_save (&cur_target, &global_options);
4437 new_target = ix86_valid_target_attribute_tree (args);
4438 new_optimize = build_optimization_node ();
4445 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4447 if (old_optimize != new_optimize)
4448 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4451 cl_target_option_restore (&global_options, &cur_target);
4453 if (old_optimize != new_optimize)
4454 cl_optimization_restore (&global_options,
4455 TREE_OPTIMIZATION (old_optimize));
4461 /* Hook to determine if one function can safely inline another. */
4464 ix86_can_inline_p (tree caller, tree callee)
4467 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4468 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4470 /* If callee has no option attributes, then it is ok to inline. */
4474 /* If caller has no option attributes, but callee does then it is not ok to
4476 else if (!caller_tree)
4481 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4482 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4484 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4485 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4487 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4488 != callee_opts->x_ix86_isa_flags)
4491 /* See if we have the same non-isa options. */
4492 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4495 /* See if arch, tune, etc. are the same. */
4496 else if (caller_opts->arch != callee_opts->arch)
4499 else if (caller_opts->tune != callee_opts->tune)
4502 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4505 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4516 /* Remember the last target of ix86_set_current_function. */
4517 static GTY(()) tree ix86_previous_fndecl;
4519 /* Establish appropriate back-end context for processing the function
4520 FNDECL. The argument might be NULL to indicate processing at top
4521 level, outside of any function scope. */
4523 ix86_set_current_function (tree fndecl)
4525 /* Only change the context if the function changes. This hook is called
4526 several times in the course of compiling a function, and we don't want to
4527 slow things down too much or call target_reinit when it isn't safe. */
4528 if (fndecl && fndecl != ix86_previous_fndecl)
4530 tree old_tree = (ix86_previous_fndecl
4531 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4534 tree new_tree = (fndecl
4535 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4538 ix86_previous_fndecl = fndecl;
4539 if (old_tree == new_tree)
4544 cl_target_option_restore (&global_options,
4545 TREE_TARGET_OPTION (new_tree));
4551 struct cl_target_option *def
4552 = TREE_TARGET_OPTION (target_option_current_node);
4554 cl_target_option_restore (&global_options, def);
4561 /* Return true if this goes in large data/bss. */
4564 ix86_in_large_data_p (tree exp)
4566 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4569 /* Functions are never large data. */
4570 if (TREE_CODE (exp) == FUNCTION_DECL)
4573 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4575 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4576 if (strcmp (section, ".ldata") == 0
4577 || strcmp (section, ".lbss") == 0)
4583 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4585 /* If this is an incomplete type with size 0, then we can't put it
4586 in data because it might be too big when completed. */
4587 if (!size || size > ix86_section_threshold)
4594 /* Switch to the appropriate section for output of DECL.
4595 DECL is either a `VAR_DECL' node or a constant of some sort.
4596 RELOC indicates whether forming the initial value of DECL requires
4597 link-time relocations. */
4599 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4603 x86_64_elf_select_section (tree decl, int reloc,
4604 unsigned HOST_WIDE_INT align)
4606 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4607 && ix86_in_large_data_p (decl))
4609 const char *sname = NULL;
4610 unsigned int flags = SECTION_WRITE;
4611 switch (categorize_decl_for_section (decl, reloc))
4616 case SECCAT_DATA_REL:
4617 sname = ".ldata.rel";
4619 case SECCAT_DATA_REL_LOCAL:
4620 sname = ".ldata.rel.local";
4622 case SECCAT_DATA_REL_RO:
4623 sname = ".ldata.rel.ro";
4625 case SECCAT_DATA_REL_RO_LOCAL:
4626 sname = ".ldata.rel.ro.local";
4630 flags |= SECTION_BSS;
4633 case SECCAT_RODATA_MERGE_STR:
4634 case SECCAT_RODATA_MERGE_STR_INIT:
4635 case SECCAT_RODATA_MERGE_CONST:
4639 case SECCAT_SRODATA:
4646 /* We don't split these for medium model. Place them into
4647 default sections and hope for best. */
4652 /* We might get called with string constants, but get_named_section
4653 doesn't like them as they are not DECLs. Also, we need to set
4654 flags in that case. */
4656 return get_section (sname, flags, NULL);
4657 return get_named_section (decl, sname, reloc);
4660 return default_elf_select_section (decl, reloc, align);
4663 /* Build up a unique section name, expressed as a
4664 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4665 RELOC indicates whether the initial value of EXP requires
4666 link-time relocations. */
4668 static void ATTRIBUTE_UNUSED
4669 x86_64_elf_unique_section (tree decl, int reloc)
4671 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4672 && ix86_in_large_data_p (decl))
4674 const char *prefix = NULL;
4675 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4676 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4678 switch (categorize_decl_for_section (decl, reloc))
4681 case SECCAT_DATA_REL:
4682 case SECCAT_DATA_REL_LOCAL:
4683 case SECCAT_DATA_REL_RO:
4684 case SECCAT_DATA_REL_RO_LOCAL:
4685 prefix = one_only ? ".ld" : ".ldata";
4688 prefix = one_only ? ".lb" : ".lbss";
4691 case SECCAT_RODATA_MERGE_STR:
4692 case SECCAT_RODATA_MERGE_STR_INIT:
4693 case SECCAT_RODATA_MERGE_CONST:
4694 prefix = one_only ? ".lr" : ".lrodata";
4696 case SECCAT_SRODATA:
4703 /* We don't split these for medium model. Place them into
4704 default sections and hope for best. */
4709 const char *name, *linkonce;
4712 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4713 name = targetm.strip_name_encoding (name);
4715 /* If we're using one_only, then there needs to be a .gnu.linkonce
4716 prefix to the section name. */
4717 linkonce = one_only ? ".gnu.linkonce" : "";
4719 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4721 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4725 default_unique_section (decl, reloc);
4728 #ifdef COMMON_ASM_OP
4729 /* This says how to output assembler code to declare an
4730 uninitialized external linkage data object.
4732 For medium model x86-64 we need to use .largecomm opcode for
4735 x86_elf_aligned_common (FILE *file,
4736 const char *name, unsigned HOST_WIDE_INT size,
4739 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4740 && size > (unsigned int)ix86_section_threshold)
4741 fputs (".largecomm\t", file);
4743 fputs (COMMON_ASM_OP, file);
4744 assemble_name (file, name);
4745 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4746 size, align / BITS_PER_UNIT);
4750 /* Utility function for targets to use in implementing
4751 ASM_OUTPUT_ALIGNED_BSS. */
4754 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4755 const char *name, unsigned HOST_WIDE_INT size,
4758 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4759 && size > (unsigned int)ix86_section_threshold)
4760 switch_to_section (get_named_section (decl, ".lbss", 0));
4762 switch_to_section (bss_section);
4763 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4764 #ifdef ASM_DECLARE_OBJECT_NAME
4765 last_assemble_variable_decl = decl;
4766 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4768 /* Standard thing is just output label for the object. */
4769 ASM_OUTPUT_LABEL (file, name);
4770 #endif /* ASM_DECLARE_OBJECT_NAME */
4771 ASM_OUTPUT_SKIP (file, size ? size : 1);
4774 /* Decide whether we must probe the stack before any space allocation
4775 on this target. It's essentially TARGET_STACK_PROBE except when
4776 -fstack-check causes the stack to be already probed differently. */
4779 ix86_target_stack_probe (void)
4781 /* Do not probe the stack twice if static stack checking is enabled. */
4782 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4785 return TARGET_STACK_PROBE;
4788 /* Decide whether we can make a sibling call to a function. DECL is the
4789 declaration of the function being targeted by the call and EXP is the
4790 CALL_EXPR representing the call. */
4793 ix86_function_ok_for_sibcall (tree decl, tree exp)
4795 tree type, decl_or_type;
4798 /* If we are generating position-independent code, we cannot sibcall
4799 optimize any indirect call, or a direct call to a global function,
4800 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4804 && (!decl || !targetm.binds_local_p (decl)))
4807 /* If we need to align the outgoing stack, then sibcalling would
4808 unalign the stack, which may break the called function. */
4809 if (ix86_minimum_incoming_stack_boundary (true)
4810 < PREFERRED_STACK_BOUNDARY)
4815 decl_or_type = decl;
4816 type = TREE_TYPE (decl);
4820 /* We're looking at the CALL_EXPR, we need the type of the function. */
4821 type = CALL_EXPR_FN (exp); /* pointer expression */
4822 type = TREE_TYPE (type); /* pointer type */
4823 type = TREE_TYPE (type); /* function type */
4824 decl_or_type = type;
4827 /* Check that the return value locations are the same. Like
4828 if we are returning floats on the 80387 register stack, we cannot
4829 make a sibcall from a function that doesn't return a float to a
4830 function that does or, conversely, from a function that does return
4831 a float to a function that doesn't; the necessary stack adjustment
4832 would not be executed. This is also the place we notice
4833 differences in the return value ABI. Note that it is ok for one
4834 of the functions to have void return type as long as the return
4835 value of the other is passed in a register. */
4836 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4837 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4839 if (STACK_REG_P (a) || STACK_REG_P (b))
4841 if (!rtx_equal_p (a, b))
4844 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4846 /* Disable sibcall if we need to generate vzeroupper after
4848 if (TARGET_VZEROUPPER
4849 && cfun->machine->callee_return_avx256_p
4850 && !cfun->machine->caller_return_avx256_p)
4853 else if (!rtx_equal_p (a, b))
4858 /* The SYSV ABI has more call-clobbered registers;
4859 disallow sibcalls from MS to SYSV. */
4860 if (cfun->machine->call_abi == MS_ABI
4861 && ix86_function_type_abi (type) == SYSV_ABI)
4866 /* If this call is indirect, we'll need to be able to use a
4867 call-clobbered register for the address of the target function.
4868 Make sure that all such registers are not used for passing
4869 parameters. Note that DLLIMPORT functions are indirect. */
4871 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4873 if (ix86_function_regparm (type, NULL) >= 3)
4875 /* ??? Need to count the actual number of registers to be used,
4876 not the possible number of registers. Fix later. */
4882 /* Otherwise okay. That also includes certain types of indirect calls. */
4886 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4887 and "sseregparm" calling convention attributes;
4888 arguments as in struct attribute_spec.handler. */
4891 ix86_handle_cconv_attribute (tree *node, tree name,
4893 int flags ATTRIBUTE_UNUSED,
4896 if (TREE_CODE (*node) != FUNCTION_TYPE
4897 && TREE_CODE (*node) != METHOD_TYPE
4898 && TREE_CODE (*node) != FIELD_DECL
4899 && TREE_CODE (*node) != TYPE_DECL)
4901 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4903 *no_add_attrs = true;
4907 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4908 if (is_attribute_p ("regparm", name))
4912 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4914 error ("fastcall and regparm attributes are not compatible");
4917 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4919 error ("regparam and thiscall attributes are not compatible");
4922 cst = TREE_VALUE (args);
4923 if (TREE_CODE (cst) != INTEGER_CST)
4925 warning (OPT_Wattributes,
4926 "%qE attribute requires an integer constant argument",
4928 *no_add_attrs = true;
4930 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4932 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4934 *no_add_attrs = true;
4942 /* Do not warn when emulating the MS ABI. */
4943 if ((TREE_CODE (*node) != FUNCTION_TYPE
4944 && TREE_CODE (*node) != METHOD_TYPE)
4945 || ix86_function_type_abi (*node) != MS_ABI)
4946 warning (OPT_Wattributes, "%qE attribute ignored",
4948 *no_add_attrs = true;
4952 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4953 if (is_attribute_p ("fastcall", name))
4955 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4957 error ("fastcall and cdecl attributes are not compatible");
4959 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4961 error ("fastcall and stdcall attributes are not compatible");
4963 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4965 error ("fastcall and regparm attributes are not compatible");
4967 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4969 error ("fastcall and thiscall attributes are not compatible");
4973 /* Can combine stdcall with fastcall (redundant), regparm and
4975 else if (is_attribute_p ("stdcall", name))
4977 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4979 error ("stdcall and cdecl attributes are not compatible");
4981 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4983 error ("stdcall and fastcall attributes are not compatible");
4985 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4987 error ("stdcall and thiscall attributes are not compatible");
4991 /* Can combine cdecl with regparm and sseregparm. */
4992 else if (is_attribute_p ("cdecl", name))
4994 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4996 error ("stdcall and cdecl attributes are not compatible");
4998 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5000 error ("fastcall and cdecl attributes are not compatible");
5002 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5004 error ("cdecl and thiscall attributes are not compatible");
5007 else if (is_attribute_p ("thiscall", name))
5009 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5010 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5012 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5014 error ("stdcall and thiscall attributes are not compatible");
5016 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5018 error ("fastcall and thiscall attributes are not compatible");
5020 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5022 error ("cdecl and thiscall attributes are not compatible");
5026 /* Can combine sseregparm with all attributes. */
5031 /* The transactional memory builtins are implicitly regparm or fastcall
5032 depending on the ABI. Override the generic do-nothing attribute that
5033 these builtins were declared with, and replace it with one of the two
5034 attributes that we expect elsewhere. */
5037 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5038 tree args ATTRIBUTE_UNUSED,
5039 int flags ATTRIBUTE_UNUSED,
5044 /* In no case do we want to add the placeholder attribute. */
5045 *no_add_attrs = true;
5047 /* The 64-bit ABI is unchanged for transactional memory. */
5051 /* ??? Is there a better way to validate 32-bit windows? We have
5052 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5053 if (CHECK_STACK_LIMIT > 0)
5054 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5057 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5058 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5060 decl_attributes (node, alt, flags);
5065 /* This function determines from TYPE the calling-convention. */
5068 ix86_get_callcvt (const_tree type)
5070 unsigned int ret = 0;
5075 return IX86_CALLCVT_CDECL;
5077 attrs = TYPE_ATTRIBUTES (type);
5078 if (attrs != NULL_TREE)
5080 if (lookup_attribute ("cdecl", attrs))
5081 ret |= IX86_CALLCVT_CDECL;
5082 else if (lookup_attribute ("stdcall", attrs))
5083 ret |= IX86_CALLCVT_STDCALL;
5084 else if (lookup_attribute ("fastcall", attrs))
5085 ret |= IX86_CALLCVT_FASTCALL;
5086 else if (lookup_attribute ("thiscall", attrs))
5087 ret |= IX86_CALLCVT_THISCALL;
5089 /* Regparam isn't allowed for thiscall and fastcall. */
5090 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5092 if (lookup_attribute ("regparm", attrs))
5093 ret |= IX86_CALLCVT_REGPARM;
5094 if (lookup_attribute ("sseregparm", attrs))
5095 ret |= IX86_CALLCVT_SSEREGPARM;
5098 if (IX86_BASE_CALLCVT(ret) != 0)
5102 is_stdarg = stdarg_p (type);
5103 if (TARGET_RTD && !is_stdarg)
5104 return IX86_CALLCVT_STDCALL | ret;
5108 || TREE_CODE (type) != METHOD_TYPE
5109 || ix86_function_type_abi (type) != MS_ABI)
5110 return IX86_CALLCVT_CDECL | ret;
5112 return IX86_CALLCVT_THISCALL;
5115 /* Return 0 if the attributes for two types are incompatible, 1 if they
5116 are compatible, and 2 if they are nearly compatible (which causes a
5117 warning to be generated). */
5120 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5122 unsigned int ccvt1, ccvt2;
5124 if (TREE_CODE (type1) != FUNCTION_TYPE
5125 && TREE_CODE (type1) != METHOD_TYPE)
5128 ccvt1 = ix86_get_callcvt (type1);
5129 ccvt2 = ix86_get_callcvt (type2);
5132 if (ix86_function_regparm (type1, NULL)
5133 != ix86_function_regparm (type2, NULL))
5139 /* Return the regparm value for a function with the indicated TYPE and DECL.
5140 DECL may be NULL when calling function indirectly
5141 or considering a libcall. */
5144 ix86_function_regparm (const_tree type, const_tree decl)
5151 return (ix86_function_type_abi (type) == SYSV_ABI
5152 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5153 ccvt = ix86_get_callcvt (type);
5154 regparm = ix86_regparm;
5156 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5158 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5161 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5165 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5167 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5170 /* Use register calling convention for local functions when possible. */
5172 && TREE_CODE (decl) == FUNCTION_DECL
5174 && !(profile_flag && !flag_fentry))
5176 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5177 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5178 if (i && i->local && i->can_change_signature)
5180 int local_regparm, globals = 0, regno;
5182 /* Make sure no regparm register is taken by a
5183 fixed register variable. */
5184 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5185 if (fixed_regs[local_regparm])
5188 /* We don't want to use regparm(3) for nested functions as
5189 these use a static chain pointer in the third argument. */
5190 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5193 /* In 32-bit mode save a register for the split stack. */
5194 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5197 /* Each fixed register usage increases register pressure,
5198 so less registers should be used for argument passing.
5199 This functionality can be overriden by an explicit
5201 for (regno = 0; regno <= DI_REG; regno++)
5202 if (fixed_regs[regno])
5206 = globals < local_regparm ? local_regparm - globals : 0;
5208 if (local_regparm > regparm)
5209 regparm = local_regparm;
5216 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5217 DFmode (2) arguments in SSE registers for a function with the
5218 indicated TYPE and DECL. DECL may be NULL when calling function
5219 indirectly or considering a libcall. Otherwise return 0. */
5222 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5224 gcc_assert (!TARGET_64BIT);
5226 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5227 by the sseregparm attribute. */
5228 if (TARGET_SSEREGPARM
5229 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5236 error ("calling %qD with attribute sseregparm without "
5237 "SSE/SSE2 enabled", decl);
5239 error ("calling %qT with attribute sseregparm without "
5240 "SSE/SSE2 enabled", type);
5248 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5249 (and DFmode for SSE2) arguments in SSE registers. */
5250 if (decl && TARGET_SSE_MATH && optimize
5251 && !(profile_flag && !flag_fentry))
5253 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5254 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5255 if (i && i->local && i->can_change_signature)
5256 return TARGET_SSE2 ? 2 : 1;
5262 /* Return true if EAX is live at the start of the function. Used by
5263 ix86_expand_prologue to determine if we need special help before
5264 calling allocate_stack_worker. */
5267 ix86_eax_live_at_start_p (void)
5269 /* Cheat. Don't bother working forward from ix86_function_regparm
5270 to the function type to whether an actual argument is located in
5271 eax. Instead just look at cfg info, which is still close enough
5272 to correct at this point. This gives false positives for broken
5273 functions that might use uninitialized data that happens to be
5274 allocated in eax, but who cares? */
5275 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5279 ix86_keep_aggregate_return_pointer (tree fntype)
5285 attr = lookup_attribute ("callee_pop_aggregate_return",
5286 TYPE_ATTRIBUTES (fntype));
5288 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5290 /* For 32-bit MS-ABI the default is to keep aggregate
5292 if (ix86_function_type_abi (fntype) == MS_ABI)
5295 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5298 /* Value is the number of bytes of arguments automatically
5299 popped when returning from a subroutine call.
5300 FUNDECL is the declaration node of the function (as a tree),
5301 FUNTYPE is the data type of the function (as a tree),
5302 or for a library call it is an identifier node for the subroutine name.
5303 SIZE is the number of bytes of arguments passed on the stack.
5305 On the 80386, the RTD insn may be used to pop them if the number
5306 of args is fixed, but if the number is variable then the caller
5307 must pop them all. RTD can't be used for library calls now
5308 because the library is compiled with the Unix compiler.
5309 Use of RTD is a selectable option, since it is incompatible with
5310 standard Unix calling sequences. If the option is not selected,
5311 the caller must always pop the args.
5313 The attribute stdcall is equivalent to RTD on a per module basis. */
5316 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5320 /* None of the 64-bit ABIs pop arguments. */
5324 ccvt = ix86_get_callcvt (funtype);
5326 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5327 | IX86_CALLCVT_THISCALL)) != 0
5328 && ! stdarg_p (funtype))
5331 /* Lose any fake structure return argument if it is passed on the stack. */
5332 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5333 && !ix86_keep_aggregate_return_pointer (funtype))
5335 int nregs = ix86_function_regparm (funtype, fundecl);
5337 return GET_MODE_SIZE (Pmode);
5343 /* Argument support functions. */
5345 /* Return true when register may be used to pass function parameters. */
5347 ix86_function_arg_regno_p (int regno)
5350 const int *parm_regs;
5355 return (regno < REGPARM_MAX
5356 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5358 return (regno < REGPARM_MAX
5359 || (TARGET_MMX && MMX_REGNO_P (regno)
5360 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5361 || (TARGET_SSE && SSE_REGNO_P (regno)
5362 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5367 if (SSE_REGNO_P (regno) && TARGET_SSE)
5372 if (TARGET_SSE && SSE_REGNO_P (regno)
5373 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5377 /* TODO: The function should depend on current function ABI but
5378 builtins.c would need updating then. Therefore we use the
5381 /* RAX is used as hidden argument to va_arg functions. */
5382 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5385 if (ix86_abi == MS_ABI)
5386 parm_regs = x86_64_ms_abi_int_parameter_registers;
5388 parm_regs = x86_64_int_parameter_registers;
5389 for (i = 0; i < (ix86_abi == MS_ABI
5390 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5391 if (regno == parm_regs[i])
5396 /* Return if we do not know how to pass TYPE solely in registers. */
5399 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5401 if (must_pass_in_stack_var_size_or_pad (mode, type))
5404 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5405 The layout_type routine is crafty and tries to trick us into passing
5406 currently unsupported vector types on the stack by using TImode. */
5407 return (!TARGET_64BIT && mode == TImode
5408 && type && TREE_CODE (type) != VECTOR_TYPE);
5411 /* It returns the size, in bytes, of the area reserved for arguments passed
5412 in registers for the function represented by fndecl dependent to the used
5415 ix86_reg_parm_stack_space (const_tree fndecl)
5417 enum calling_abi call_abi = SYSV_ABI;
5418 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5419 call_abi = ix86_function_abi (fndecl);
5421 call_abi = ix86_function_type_abi (fndecl);
5422 if (TARGET_64BIT && call_abi == MS_ABI)
5427 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5430 ix86_function_type_abi (const_tree fntype)
5432 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5434 enum calling_abi abi = ix86_abi;
5435 if (abi == SYSV_ABI)
5437 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5440 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5448 ix86_function_ms_hook_prologue (const_tree fn)
5450 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5452 if (decl_function_context (fn) != NULL_TREE)
5453 error_at (DECL_SOURCE_LOCATION (fn),
5454 "ms_hook_prologue is not compatible with nested function");
5461 static enum calling_abi
5462 ix86_function_abi (const_tree fndecl)
5466 return ix86_function_type_abi (TREE_TYPE (fndecl));
5469 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5472 ix86_cfun_abi (void)
5476 return cfun->machine->call_abi;
5479 /* Write the extra assembler code needed to declare a function properly. */
5482 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5485 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5489 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5490 unsigned int filler_cc = 0xcccccccc;
5492 for (i = 0; i < filler_count; i += 4)
5493 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5496 #ifdef SUBTARGET_ASM_UNWIND_INIT
5497 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5500 ASM_OUTPUT_LABEL (asm_out_file, fname);
5502 /* Output magic byte marker, if hot-patch attribute is set. */
5507 /* leaq [%rsp + 0], %rsp */
5508 asm_fprintf (asm_out_file, ASM_BYTE
5509 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5513 /* movl.s %edi, %edi
5515 movl.s %esp, %ebp */
5516 asm_fprintf (asm_out_file, ASM_BYTE
5517 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5523 extern void init_regs (void);
5525 /* Implementation of call abi switching target hook. Specific to FNDECL
5526 the specific call register sets are set. See also
5527 ix86_conditional_register_usage for more details. */
5529 ix86_call_abi_override (const_tree fndecl)
5531 if (fndecl == NULL_TREE)
5532 cfun->machine->call_abi = ix86_abi;
5534 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5537 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5538 expensive re-initialization of init_regs each time we switch function context
5539 since this is needed only during RTL expansion. */
5541 ix86_maybe_switch_abi (void)
5544 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5548 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5549 for a call to a function whose data type is FNTYPE.
5550 For a library call, FNTYPE is 0. */
5553 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5554 tree fntype, /* tree ptr for function decl */
5555 rtx libname, /* SYMBOL_REF of library name or 0 */
5559 struct cgraph_local_info *i;
5562 memset (cum, 0, sizeof (*cum));
5564 /* Initialize for the current callee. */
5567 cfun->machine->callee_pass_avx256_p = false;
5568 cfun->machine->callee_return_avx256_p = false;
5573 i = cgraph_local_info (fndecl);
5574 cum->call_abi = ix86_function_abi (fndecl);
5575 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5580 cum->call_abi = ix86_function_type_abi (fntype);
5582 fnret_type = TREE_TYPE (fntype);
5587 if (TARGET_VZEROUPPER && fnret_type)
5589 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5591 if (function_pass_avx256_p (fnret_value))
5593 /* The return value of this function uses 256bit AVX modes. */
5596 cfun->machine->callee_return_avx256_p = true;
5597 cum->callee_return_avx256_p = true;
5600 cfun->machine->caller_return_avx256_p = true;
5604 cum->caller = caller;
5606 /* Set up the number of registers to use for passing arguments. */
5608 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5609 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5610 "or subtarget optimization implying it");
5611 cum->nregs = ix86_regparm;
5614 cum->nregs = (cum->call_abi == SYSV_ABI
5615 ? X86_64_REGPARM_MAX
5616 : X86_64_MS_REGPARM_MAX);
5620 cum->sse_nregs = SSE_REGPARM_MAX;
5623 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5624 ? X86_64_SSE_REGPARM_MAX
5625 : X86_64_MS_SSE_REGPARM_MAX);
5629 cum->mmx_nregs = MMX_REGPARM_MAX;
5630 cum->warn_avx = true;
5631 cum->warn_sse = true;
5632 cum->warn_mmx = true;
5634 /* Because type might mismatch in between caller and callee, we need to
5635 use actual type of function for local calls.
5636 FIXME: cgraph_analyze can be told to actually record if function uses
5637 va_start so for local functions maybe_vaarg can be made aggressive
5639 FIXME: once typesytem is fixed, we won't need this code anymore. */
5640 if (i && i->local && i->can_change_signature)
5641 fntype = TREE_TYPE (fndecl);
5642 cum->maybe_vaarg = (fntype
5643 ? (!prototype_p (fntype) || stdarg_p (fntype))
5648 /* If there are variable arguments, then we won't pass anything
5649 in registers in 32-bit mode. */
5650 if (stdarg_p (fntype))
5661 /* Use ecx and edx registers if function has fastcall attribute,
5662 else look for regparm information. */
5665 unsigned int ccvt = ix86_get_callcvt (fntype);
5666 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5669 cum->fastcall = 1; /* Same first register as in fastcall. */
5671 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5677 cum->nregs = ix86_function_regparm (fntype, fndecl);
5680 /* Set up the number of SSE registers used for passing SFmode
5681 and DFmode arguments. Warn for mismatching ABI. */
5682 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5686 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5687 But in the case of vector types, it is some vector mode.
5689 When we have only some of our vector isa extensions enabled, then there
5690 are some modes for which vector_mode_supported_p is false. For these
5691 modes, the generic vector support in gcc will choose some non-vector mode
5692 in order to implement the type. By computing the natural mode, we'll
5693 select the proper ABI location for the operand and not depend on whatever
5694 the middle-end decides to do with these vector types.
5696 The midde-end can't deal with the vector types > 16 bytes. In this
5697 case, we return the original mode and warn ABI change if CUM isn't
5700 static enum machine_mode
5701 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5703 enum machine_mode mode = TYPE_MODE (type);
5705 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5707 HOST_WIDE_INT size = int_size_in_bytes (type);
5708 if ((size == 8 || size == 16 || size == 32)
5709 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5710 && TYPE_VECTOR_SUBPARTS (type) > 1)
5712 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5714 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5715 mode = MIN_MODE_VECTOR_FLOAT;
5717 mode = MIN_MODE_VECTOR_INT;
5719 /* Get the mode which has this inner mode and number of units. */
5720 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5721 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5722 && GET_MODE_INNER (mode) == innermode)
5724 if (size == 32 && !TARGET_AVX)
5726 static bool warnedavx;
5733 warning (0, "AVX vector argument without AVX "
5734 "enabled changes the ABI");
5736 return TYPE_MODE (type);
5749 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5750 this may not agree with the mode that the type system has chosen for the
5751 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5752 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5755 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5760 if (orig_mode != BLKmode)
5761 tmp = gen_rtx_REG (orig_mode, regno);
5764 tmp = gen_rtx_REG (mode, regno);
5765 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5766 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5772 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5773 of this code is to classify each 8bytes of incoming argument by the register
5774 class and assign registers accordingly. */
5776 /* Return the union class of CLASS1 and CLASS2.
5777 See the x86-64 PS ABI for details. */
5779 static enum x86_64_reg_class
5780 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5782 /* Rule #1: If both classes are equal, this is the resulting class. */
5783 if (class1 == class2)
5786 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5788 if (class1 == X86_64_NO_CLASS)
5790 if (class2 == X86_64_NO_CLASS)
5793 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5794 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5795 return X86_64_MEMORY_CLASS;
5797 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5798 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5799 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5800 return X86_64_INTEGERSI_CLASS;
5801 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5802 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5803 return X86_64_INTEGER_CLASS;
5805 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5807 if (class1 == X86_64_X87_CLASS
5808 || class1 == X86_64_X87UP_CLASS
5809 || class1 == X86_64_COMPLEX_X87_CLASS
5810 || class2 == X86_64_X87_CLASS
5811 || class2 == X86_64_X87UP_CLASS
5812 || class2 == X86_64_COMPLEX_X87_CLASS)
5813 return X86_64_MEMORY_CLASS;
5815 /* Rule #6: Otherwise class SSE is used. */
5816 return X86_64_SSE_CLASS;
5819 /* Classify the argument of type TYPE and mode MODE.
5820 CLASSES will be filled by the register class used to pass each word
5821 of the operand. The number of words is returned. In case the parameter
5822 should be passed in memory, 0 is returned. As a special case for zero
5823 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5825 BIT_OFFSET is used internally for handling records and specifies offset
5826 of the offset in bits modulo 256 to avoid overflow cases.
5828 See the x86-64 PS ABI for details.
5832 classify_argument (enum machine_mode mode, const_tree type,
5833 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5835 HOST_WIDE_INT bytes =
5836 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5837 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5839 /* Variable sized entities are always passed/returned in memory. */
5843 if (mode != VOIDmode
5844 && targetm.calls.must_pass_in_stack (mode, type))
5847 if (type && AGGREGATE_TYPE_P (type))
5851 enum x86_64_reg_class subclasses[MAX_CLASSES];
5853 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5857 for (i = 0; i < words; i++)
5858 classes[i] = X86_64_NO_CLASS;
5860 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5861 signalize memory class, so handle it as special case. */
5864 classes[0] = X86_64_NO_CLASS;
5868 /* Classify each field of record and merge classes. */
5869 switch (TREE_CODE (type))
5872 /* And now merge the fields of structure. */
5873 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5875 if (TREE_CODE (field) == FIELD_DECL)
5879 if (TREE_TYPE (field) == error_mark_node)
5882 /* Bitfields are always classified as integer. Handle them
5883 early, since later code would consider them to be
5884 misaligned integers. */
5885 if (DECL_BIT_FIELD (field))
5887 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5888 i < ((int_bit_position (field) + (bit_offset % 64))
5889 + tree_low_cst (DECL_SIZE (field), 0)
5892 merge_classes (X86_64_INTEGER_CLASS,
5899 type = TREE_TYPE (field);
5901 /* Flexible array member is ignored. */
5902 if (TYPE_MODE (type) == BLKmode
5903 && TREE_CODE (type) == ARRAY_TYPE
5904 && TYPE_SIZE (type) == NULL_TREE
5905 && TYPE_DOMAIN (type) != NULL_TREE
5906 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5911 if (!warned && warn_psabi)
5914 inform (input_location,
5915 "the ABI of passing struct with"
5916 " a flexible array member has"
5917 " changed in GCC 4.4");
5921 num = classify_argument (TYPE_MODE (type), type,
5923 (int_bit_position (field)
5924 + bit_offset) % 256);
5927 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5928 for (i = 0; i < num && (i + pos) < words; i++)
5930 merge_classes (subclasses[i], classes[i + pos]);
5937 /* Arrays are handled as small records. */
5940 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5941 TREE_TYPE (type), subclasses, bit_offset);
5945 /* The partial classes are now full classes. */
5946 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5947 subclasses[0] = X86_64_SSE_CLASS;
5948 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5949 && !((bit_offset % 64) == 0 && bytes == 4))
5950 subclasses[0] = X86_64_INTEGER_CLASS;
5952 for (i = 0; i < words; i++)
5953 classes[i] = subclasses[i % num];
5958 case QUAL_UNION_TYPE:
5959 /* Unions are similar to RECORD_TYPE but offset is always 0.
5961 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5963 if (TREE_CODE (field) == FIELD_DECL)
5967 if (TREE_TYPE (field) == error_mark_node)
5970 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5971 TREE_TYPE (field), subclasses,
5975 for (i = 0; i < num; i++)
5976 classes[i] = merge_classes (subclasses[i], classes[i]);
5987 /* When size > 16 bytes, if the first one isn't
5988 X86_64_SSE_CLASS or any other ones aren't
5989 X86_64_SSEUP_CLASS, everything should be passed in
5991 if (classes[0] != X86_64_SSE_CLASS)
5994 for (i = 1; i < words; i++)
5995 if (classes[i] != X86_64_SSEUP_CLASS)
5999 /* Final merger cleanup. */
6000 for (i = 0; i < words; i++)
6002 /* If one class is MEMORY, everything should be passed in
6004 if (classes[i] == X86_64_MEMORY_CLASS)
6007 /* The X86_64_SSEUP_CLASS should be always preceded by
6008 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6009 if (classes[i] == X86_64_SSEUP_CLASS
6010 && classes[i - 1] != X86_64_SSE_CLASS
6011 && classes[i - 1] != X86_64_SSEUP_CLASS)
6013 /* The first one should never be X86_64_SSEUP_CLASS. */
6014 gcc_assert (i != 0);
6015 classes[i] = X86_64_SSE_CLASS;
6018 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6019 everything should be passed in memory. */
6020 if (classes[i] == X86_64_X87UP_CLASS
6021 && (classes[i - 1] != X86_64_X87_CLASS))
6025 /* The first one should never be X86_64_X87UP_CLASS. */
6026 gcc_assert (i != 0);
6027 if (!warned && warn_psabi)
6030 inform (input_location,
6031 "the ABI of passing union with long double"
6032 " has changed in GCC 4.4");
6040 /* Compute alignment needed. We align all types to natural boundaries with
6041 exception of XFmode that is aligned to 64bits. */
6042 if (mode != VOIDmode && mode != BLKmode)
6044 int mode_alignment = GET_MODE_BITSIZE (mode);
6047 mode_alignment = 128;
6048 else if (mode == XCmode)
6049 mode_alignment = 256;
6050 if (COMPLEX_MODE_P (mode))
6051 mode_alignment /= 2;
6052 /* Misaligned fields are always returned in memory. */
6053 if (bit_offset % mode_alignment)
6057 /* for V1xx modes, just use the base mode */
6058 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6059 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6060 mode = GET_MODE_INNER (mode);
6062 /* Classification of atomic types. */
6067 classes[0] = X86_64_SSE_CLASS;
6070 classes[0] = X86_64_SSE_CLASS;
6071 classes[1] = X86_64_SSEUP_CLASS;
6081 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6083 /* Analyze last 128 bits only. */
6084 size = (size - 1) & 0x7f;
6088 classes[0] = X86_64_INTEGERSI_CLASS;
6093 classes[0] = X86_64_INTEGER_CLASS;
6096 else if (size < 64+32)
6098 classes[0] = X86_64_INTEGER_CLASS;
6099 classes[1] = X86_64_INTEGERSI_CLASS;
6102 else if (size < 64+64)
6104 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6112 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6116 /* OImode shouldn't be used directly. */
6121 if (!(bit_offset % 64))
6122 classes[0] = X86_64_SSESF_CLASS;
6124 classes[0] = X86_64_SSE_CLASS;
6127 classes[0] = X86_64_SSEDF_CLASS;
6130 classes[0] = X86_64_X87_CLASS;
6131 classes[1] = X86_64_X87UP_CLASS;
6134 classes[0] = X86_64_SSE_CLASS;
6135 classes[1] = X86_64_SSEUP_CLASS;
6138 classes[0] = X86_64_SSE_CLASS;
6139 if (!(bit_offset % 64))
6145 if (!warned && warn_psabi)
6148 inform (input_location,
6149 "the ABI of passing structure with complex float"
6150 " member has changed in GCC 4.4");
6152 classes[1] = X86_64_SSESF_CLASS;
6156 classes[0] = X86_64_SSEDF_CLASS;
6157 classes[1] = X86_64_SSEDF_CLASS;
6160 classes[0] = X86_64_COMPLEX_X87_CLASS;
6163 /* This modes is larger than 16 bytes. */
6171 classes[0] = X86_64_SSE_CLASS;
6172 classes[1] = X86_64_SSEUP_CLASS;
6173 classes[2] = X86_64_SSEUP_CLASS;
6174 classes[3] = X86_64_SSEUP_CLASS;
6182 classes[0] = X86_64_SSE_CLASS;
6183 classes[1] = X86_64_SSEUP_CLASS;
6191 classes[0] = X86_64_SSE_CLASS;
6197 gcc_assert (VECTOR_MODE_P (mode));
6202 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6204 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6205 classes[0] = X86_64_INTEGERSI_CLASS;
6207 classes[0] = X86_64_INTEGER_CLASS;
6208 classes[1] = X86_64_INTEGER_CLASS;
6209 return 1 + (bytes > 8);
6213 /* Examine the argument and return set number of register required in each
6214 class. Return 0 iff parameter should be passed in memory. */
6216 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6217 int *int_nregs, int *sse_nregs)
6219 enum x86_64_reg_class regclass[MAX_CLASSES];
6220 int n = classify_argument (mode, type, regclass, 0);
6226 for (n--; n >= 0; n--)
6227 switch (regclass[n])
6229 case X86_64_INTEGER_CLASS:
6230 case X86_64_INTEGERSI_CLASS:
6233 case X86_64_SSE_CLASS:
6234 case X86_64_SSESF_CLASS:
6235 case X86_64_SSEDF_CLASS:
6238 case X86_64_NO_CLASS:
6239 case X86_64_SSEUP_CLASS:
6241 case X86_64_X87_CLASS:
6242 case X86_64_X87UP_CLASS:
6246 case X86_64_COMPLEX_X87_CLASS:
6247 return in_return ? 2 : 0;
6248 case X86_64_MEMORY_CLASS:
6254 /* Construct container for the argument used by GCC interface. See
6255 FUNCTION_ARG for the detailed description. */
6258 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6259 const_tree type, int in_return, int nintregs, int nsseregs,
6260 const int *intreg, int sse_regno)
6262 /* The following variables hold the static issued_error state. */
6263 static bool issued_sse_arg_error;
6264 static bool issued_sse_ret_error;
6265 static bool issued_x87_ret_error;
6267 enum machine_mode tmpmode;
6269 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6270 enum x86_64_reg_class regclass[MAX_CLASSES];
6274 int needed_sseregs, needed_intregs;
6275 rtx exp[MAX_CLASSES];
6278 n = classify_argument (mode, type, regclass, 0);
6281 if (!examine_argument (mode, type, in_return, &needed_intregs,
6284 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6287 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6288 some less clueful developer tries to use floating-point anyway. */
6289 if (needed_sseregs && !TARGET_SSE)
6293 if (!issued_sse_ret_error)
6295 error ("SSE register return with SSE disabled");
6296 issued_sse_ret_error = true;
6299 else if (!issued_sse_arg_error)
6301 error ("SSE register argument with SSE disabled");
6302 issued_sse_arg_error = true;
6307 /* Likewise, error if the ABI requires us to return values in the
6308 x87 registers and the user specified -mno-80387. */
6309 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6310 for (i = 0; i < n; i++)
6311 if (regclass[i] == X86_64_X87_CLASS
6312 || regclass[i] == X86_64_X87UP_CLASS
6313 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6315 if (!issued_x87_ret_error)
6317 error ("x87 register return with x87 disabled");
6318 issued_x87_ret_error = true;
6323 /* First construct simple cases. Avoid SCmode, since we want to use
6324 single register to pass this type. */
6325 if (n == 1 && mode != SCmode)
6326 switch (regclass[0])
6328 case X86_64_INTEGER_CLASS:
6329 case X86_64_INTEGERSI_CLASS:
6330 return gen_rtx_REG (mode, intreg[0]);
6331 case X86_64_SSE_CLASS:
6332 case X86_64_SSESF_CLASS:
6333 case X86_64_SSEDF_CLASS:
6334 if (mode != BLKmode)
6335 return gen_reg_or_parallel (mode, orig_mode,
6336 SSE_REGNO (sse_regno));
6338 case X86_64_X87_CLASS:
6339 case X86_64_COMPLEX_X87_CLASS:
6340 return gen_rtx_REG (mode, FIRST_STACK_REG);
6341 case X86_64_NO_CLASS:
6342 /* Zero sized array, struct or class. */
6347 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6348 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6349 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6351 && regclass[0] == X86_64_SSE_CLASS
6352 && regclass[1] == X86_64_SSEUP_CLASS
6353 && regclass[2] == X86_64_SSEUP_CLASS
6354 && regclass[3] == X86_64_SSEUP_CLASS
6356 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6359 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6360 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6361 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6362 && regclass[1] == X86_64_INTEGER_CLASS
6363 && (mode == CDImode || mode == TImode)
6364 && intreg[0] + 1 == intreg[1])
6365 return gen_rtx_REG (mode, intreg[0]);
6367 /* Otherwise figure out the entries of the PARALLEL. */
6368 for (i = 0; i < n; i++)
6372 switch (regclass[i])
6374 case X86_64_NO_CLASS:
6376 case X86_64_INTEGER_CLASS:
6377 case X86_64_INTEGERSI_CLASS:
6378 /* Merge TImodes on aligned occasions here too. */
6379 if (i * 8 + 8 > bytes)
6380 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6381 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6385 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6386 if (tmpmode == BLKmode)
6388 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6389 gen_rtx_REG (tmpmode, *intreg),
6393 case X86_64_SSESF_CLASS:
6394 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6395 gen_rtx_REG (SFmode,
6396 SSE_REGNO (sse_regno)),
6400 case X86_64_SSEDF_CLASS:
6401 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6402 gen_rtx_REG (DFmode,
6403 SSE_REGNO (sse_regno)),
6407 case X86_64_SSE_CLASS:
6415 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6425 && regclass[1] == X86_64_SSEUP_CLASS
6426 && regclass[2] == X86_64_SSEUP_CLASS
6427 && regclass[3] == X86_64_SSEUP_CLASS);
6434 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6435 gen_rtx_REG (tmpmode,
6436 SSE_REGNO (sse_regno)),
6445 /* Empty aligned struct, union or class. */
6449 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6450 for (i = 0; i < nexps; i++)
6451 XVECEXP (ret, 0, i) = exp [i];
6455 /* Update the data in CUM to advance over an argument of mode MODE
6456 and data type TYPE. (TYPE is null for libcalls where that information
6457 may not be available.) */
6460 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6461 const_tree type, HOST_WIDE_INT bytes,
6462 HOST_WIDE_INT words)
6478 cum->words += words;
6479 cum->nregs -= words;
6480 cum->regno += words;
6482 if (cum->nregs <= 0)
6490 /* OImode shouldn't be used directly. */
6494 if (cum->float_in_sse < 2)
6497 if (cum->float_in_sse < 1)
6514 if (!type || !AGGREGATE_TYPE_P (type))
6516 cum->sse_words += words;
6517 cum->sse_nregs -= 1;
6518 cum->sse_regno += 1;
6519 if (cum->sse_nregs <= 0)
6533 if (!type || !AGGREGATE_TYPE_P (type))
6535 cum->mmx_words += words;
6536 cum->mmx_nregs -= 1;
6537 cum->mmx_regno += 1;
6538 if (cum->mmx_nregs <= 0)
6549 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6550 const_tree type, HOST_WIDE_INT words, bool named)
6552 int int_nregs, sse_nregs;
6554 /* Unnamed 256bit vector mode parameters are passed on stack. */
6555 if (!named && VALID_AVX256_REG_MODE (mode))
6558 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6559 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6561 cum->nregs -= int_nregs;
6562 cum->sse_nregs -= sse_nregs;
6563 cum->regno += int_nregs;
6564 cum->sse_regno += sse_nregs;
6568 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6569 cum->words = (cum->words + align - 1) & ~(align - 1);
6570 cum->words += words;
6575 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6576 HOST_WIDE_INT words)
6578 /* Otherwise, this should be passed indirect. */
6579 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6581 cum->words += words;
6589 /* Update the data in CUM to advance over an argument of mode MODE and
6590 data type TYPE. (TYPE is null for libcalls where that information
6591 may not be available.) */
6594 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6595 const_tree type, bool named)
6597 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6598 HOST_WIDE_INT bytes, words;
6600 if (mode == BLKmode)
6601 bytes = int_size_in_bytes (type);
6603 bytes = GET_MODE_SIZE (mode);
6604 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6607 mode = type_natural_mode (type, NULL);
6609 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6610 function_arg_advance_ms_64 (cum, bytes, words);
6611 else if (TARGET_64BIT)
6612 function_arg_advance_64 (cum, mode, type, words, named);
6614 function_arg_advance_32 (cum, mode, type, bytes, words);
6617 /* Define where to put the arguments to a function.
6618 Value is zero to push the argument on the stack,
6619 or a hard register in which to store the argument.
6621 MODE is the argument's machine mode.
6622 TYPE is the data type of the argument (as a tree).
6623 This is null for libcalls where that information may
6625 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6626 the preceding args and about the function being called.
6627 NAMED is nonzero if this argument is a named parameter
6628 (otherwise it is an extra parameter matching an ellipsis). */
6631 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6632 enum machine_mode orig_mode, const_tree type,
6633 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6635 static bool warnedsse, warnedmmx;
6637 /* Avoid the AL settings for the Unix64 ABI. */
6638 if (mode == VOIDmode)
6654 if (words <= cum->nregs)
6656 int regno = cum->regno;
6658 /* Fastcall allocates the first two DWORD (SImode) or
6659 smaller arguments to ECX and EDX if it isn't an
6665 || (type && AGGREGATE_TYPE_P (type)))
6668 /* ECX not EAX is the first allocated register. */
6669 if (regno == AX_REG)
6672 return gen_rtx_REG (mode, regno);
6677 if (cum->float_in_sse < 2)
6680 if (cum->float_in_sse < 1)
6684 /* In 32bit, we pass TImode in xmm registers. */
6691 if (!type || !AGGREGATE_TYPE_P (type))
6693 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6696 warning (0, "SSE vector argument without SSE enabled "
6700 return gen_reg_or_parallel (mode, orig_mode,
6701 cum->sse_regno + FIRST_SSE_REG);
6706 /* OImode shouldn't be used directly. */
6715 if (!type || !AGGREGATE_TYPE_P (type))
6718 return gen_reg_or_parallel (mode, orig_mode,
6719 cum->sse_regno + FIRST_SSE_REG);
6729 if (!type || !AGGREGATE_TYPE_P (type))
6731 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6734 warning (0, "MMX vector argument without MMX enabled "
6738 return gen_reg_or_parallel (mode, orig_mode,
6739 cum->mmx_regno + FIRST_MMX_REG);
6748 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6749 enum machine_mode orig_mode, const_tree type, bool named)
6751 /* Handle a hidden AL argument containing number of registers
6752 for varargs x86-64 functions. */
6753 if (mode == VOIDmode)
6754 return GEN_INT (cum->maybe_vaarg
6755 ? (cum->sse_nregs < 0
6756 ? X86_64_SSE_REGPARM_MAX
6771 /* Unnamed 256bit vector mode parameters are passed on stack. */
6777 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6779 &x86_64_int_parameter_registers [cum->regno],
6784 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6785 enum machine_mode orig_mode, bool named,
6786 HOST_WIDE_INT bytes)
6790 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6791 We use value of -2 to specify that current function call is MSABI. */
6792 if (mode == VOIDmode)
6793 return GEN_INT (-2);
6795 /* If we've run out of registers, it goes on the stack. */
6796 if (cum->nregs == 0)
6799 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6801 /* Only floating point modes are passed in anything but integer regs. */
6802 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6805 regno = cum->regno + FIRST_SSE_REG;
6810 /* Unnamed floating parameters are passed in both the
6811 SSE and integer registers. */
6812 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6813 t2 = gen_rtx_REG (mode, regno);
6814 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6815 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6816 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6819 /* Handle aggregated types passed in register. */
6820 if (orig_mode == BLKmode)
6822 if (bytes > 0 && bytes <= 8)
6823 mode = (bytes > 4 ? DImode : SImode);
6824 if (mode == BLKmode)
6828 return gen_reg_or_parallel (mode, orig_mode, regno);
6831 /* Return where to put the arguments to a function.
6832 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6834 MODE is the argument's machine mode. TYPE is the data type of the
6835 argument. It is null for libcalls where that information may not be
6836 available. CUM gives information about the preceding args and about
6837 the function being called. NAMED is nonzero if this argument is a
6838 named parameter (otherwise it is an extra parameter matching an
6842 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6843 const_tree type, bool named)
6845 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6846 enum machine_mode mode = omode;
6847 HOST_WIDE_INT bytes, words;
6850 if (mode == BLKmode)
6851 bytes = int_size_in_bytes (type);
6853 bytes = GET_MODE_SIZE (mode);
6854 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6856 /* To simplify the code below, represent vector types with a vector mode
6857 even if MMX/SSE are not active. */
6858 if (type && TREE_CODE (type) == VECTOR_TYPE)
6859 mode = type_natural_mode (type, cum);
6861 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6862 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6863 else if (TARGET_64BIT)
6864 arg = function_arg_64 (cum, mode, omode, type, named);
6866 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6868 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6870 /* This argument uses 256bit AVX modes. */
6872 cum->callee_pass_avx256_p = true;
6874 cfun->machine->caller_pass_avx256_p = true;
6877 if (cum->caller && mode == VOIDmode)
6879 /* This function is called with MODE == VOIDmode immediately
6880 before the call instruction is emitted. We copy callee 256bit
6881 AVX info from the current CUM here. */
6882 cfun->machine->callee_return_avx256_p = cum->callee_return_avx256_p;
6883 cfun->machine->callee_pass_avx256_p = cum->callee_pass_avx256_p;
6889 /* A C expression that indicates when an argument must be passed by
6890 reference. If nonzero for an argument, a copy of that argument is
6891 made in memory and a pointer to the argument is passed instead of
6892 the argument itself. The pointer is passed in whatever way is
6893 appropriate for passing a pointer to that type. */
6896 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6897 enum machine_mode mode ATTRIBUTE_UNUSED,
6898 const_tree type, bool named ATTRIBUTE_UNUSED)
6900 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6902 /* See Windows x64 Software Convention. */
6903 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6905 int msize = (int) GET_MODE_SIZE (mode);
6908 /* Arrays are passed by reference. */
6909 if (TREE_CODE (type) == ARRAY_TYPE)
6912 if (AGGREGATE_TYPE_P (type))
6914 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6915 are passed by reference. */
6916 msize = int_size_in_bytes (type);
6920 /* __m128 is passed by reference. */
6922 case 1: case 2: case 4: case 8:
6928 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6934 /* Return true when TYPE should be 128bit aligned for 32bit argument
6935 passing ABI. XXX: This function is obsolete and is only used for
6936 checking psABI compatibility with previous versions of GCC. */
6939 ix86_compat_aligned_value_p (const_tree type)
6941 enum machine_mode mode = TYPE_MODE (type);
6942 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6946 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6948 if (TYPE_ALIGN (type) < 128)
6951 if (AGGREGATE_TYPE_P (type))
6953 /* Walk the aggregates recursively. */
6954 switch (TREE_CODE (type))
6958 case QUAL_UNION_TYPE:
6962 /* Walk all the structure fields. */
6963 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6965 if (TREE_CODE (field) == FIELD_DECL
6966 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6973 /* Just for use if some languages passes arrays by value. */
6974 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6985 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6986 XXX: This function is obsolete and is only used for checking psABI
6987 compatibility with previous versions of GCC. */
6990 ix86_compat_function_arg_boundary (enum machine_mode mode,
6991 const_tree type, unsigned int align)
6993 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6994 natural boundaries. */
6995 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6997 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6998 make an exception for SSE modes since these require 128bit
7001 The handling here differs from field_alignment. ICC aligns MMX
7002 arguments to 4 byte boundaries, while structure fields are aligned
7003 to 8 byte boundaries. */
7006 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7007 align = PARM_BOUNDARY;
7011 if (!ix86_compat_aligned_value_p (type))
7012 align = PARM_BOUNDARY;
7015 if (align > BIGGEST_ALIGNMENT)
7016 align = BIGGEST_ALIGNMENT;
7020 /* Return true when TYPE should be 128bit aligned for 32bit argument
7024 ix86_contains_aligned_value_p (const_tree type)
7026 enum machine_mode mode = TYPE_MODE (type);
7028 if (mode == XFmode || mode == XCmode)
7031 if (TYPE_ALIGN (type) < 128)
7034 if (AGGREGATE_TYPE_P (type))
7036 /* Walk the aggregates recursively. */
7037 switch (TREE_CODE (type))
7041 case QUAL_UNION_TYPE:
7045 /* Walk all the structure fields. */
7046 for (field = TYPE_FIELDS (type);
7048 field = DECL_CHAIN (field))
7050 if (TREE_CODE (field) == FIELD_DECL
7051 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7058 /* Just for use if some languages passes arrays by value. */
7059 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7068 return TYPE_ALIGN (type) >= 128;
7073 /* Gives the alignment boundary, in bits, of an argument with the
7074 specified mode and type. */
7077 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7082 /* Since the main variant type is used for call, we convert it to
7083 the main variant type. */
7084 type = TYPE_MAIN_VARIANT (type);
7085 align = TYPE_ALIGN (type);
7088 align = GET_MODE_ALIGNMENT (mode);
7089 if (align < PARM_BOUNDARY)
7090 align = PARM_BOUNDARY;
7094 unsigned int saved_align = align;
7098 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7101 if (mode == XFmode || mode == XCmode)
7102 align = PARM_BOUNDARY;
7104 else if (!ix86_contains_aligned_value_p (type))
7105 align = PARM_BOUNDARY;
7108 align = PARM_BOUNDARY;
7113 && align != ix86_compat_function_arg_boundary (mode, type,
7117 inform (input_location,
7118 "The ABI for passing parameters with %d-byte"
7119 " alignment has changed in GCC 4.6",
7120 align / BITS_PER_UNIT);
7127 /* Return true if N is a possible register number of function value. */
7130 ix86_function_value_regno_p (const unsigned int regno)
7139 return TARGET_64BIT && ix86_abi != MS_ABI;
7141 /* Complex values are returned in %st(0)/%st(1) pair. */
7144 /* TODO: The function should depend on current function ABI but
7145 builtins.c would need updating then. Therefore we use the
7147 if (TARGET_64BIT && ix86_abi == MS_ABI)
7149 return TARGET_FLOAT_RETURNS_IN_80387;
7151 /* Complex values are returned in %xmm0/%xmm1 pair. */
7157 if (TARGET_MACHO || TARGET_64BIT)
7165 /* Define how to find the value returned by a function.
7166 VALTYPE is the data type of the value (as a tree).
7167 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7168 otherwise, FUNC is 0. */
7171 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7172 const_tree fntype, const_tree fn)
7176 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7177 we normally prevent this case when mmx is not available. However
7178 some ABIs may require the result to be returned like DImode. */
7179 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7180 regno = FIRST_MMX_REG;
7182 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7183 we prevent this case when sse is not available. However some ABIs
7184 may require the result to be returned like integer TImode. */
7185 else if (mode == TImode
7186 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7187 regno = FIRST_SSE_REG;
7189 /* 32-byte vector modes in %ymm0. */
7190 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7191 regno = FIRST_SSE_REG;
7193 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7194 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7195 regno = FIRST_FLOAT_REG;
7197 /* Most things go in %eax. */
7200 /* Override FP return register with %xmm0 for local functions when
7201 SSE math is enabled or for functions with sseregparm attribute. */
7202 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7204 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7205 if ((sse_level >= 1 && mode == SFmode)
7206 || (sse_level == 2 && mode == DFmode))
7207 regno = FIRST_SSE_REG;
7210 /* OImode shouldn't be used directly. */
7211 gcc_assert (mode != OImode);
7213 return gen_rtx_REG (orig_mode, regno);
7217 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7222 /* Handle libcalls, which don't provide a type node. */
7223 if (valtype == NULL)
7237 regno = FIRST_SSE_REG;
7241 regno = FIRST_FLOAT_REG;
7249 return gen_rtx_REG (mode, regno);
7251 else if (POINTER_TYPE_P (valtype))
7253 /* Pointers are always returned in Pmode. */
7257 ret = construct_container (mode, orig_mode, valtype, 1,
7258 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7259 x86_64_int_return_registers, 0);
7261 /* For zero sized structures, construct_container returns NULL, but we
7262 need to keep rest of compiler happy by returning meaningful value. */
7264 ret = gen_rtx_REG (orig_mode, AX_REG);
7270 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7272 unsigned int regno = AX_REG;
7276 switch (GET_MODE_SIZE (mode))
7279 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7280 && !COMPLEX_MODE_P (mode))
7281 regno = FIRST_SSE_REG;
7285 if (mode == SFmode || mode == DFmode)
7286 regno = FIRST_SSE_REG;
7292 return gen_rtx_REG (orig_mode, regno);
7296 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7297 enum machine_mode orig_mode, enum machine_mode mode)
7299 const_tree fn, fntype;
7302 if (fntype_or_decl && DECL_P (fntype_or_decl))
7303 fn = fntype_or_decl;
7304 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7306 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7307 return function_value_ms_64 (orig_mode, mode);
7308 else if (TARGET_64BIT)
7309 return function_value_64 (orig_mode, mode, valtype);
7311 return function_value_32 (orig_mode, mode, fntype, fn);
7315 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7316 bool outgoing ATTRIBUTE_UNUSED)
7318 enum machine_mode mode, orig_mode;
7320 orig_mode = TYPE_MODE (valtype);
7321 mode = type_natural_mode (valtype, NULL);
7322 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7325 /* Pointer function arguments and return values are promoted to Pmode. */
7327 static enum machine_mode
7328 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7329 int *punsignedp, const_tree fntype,
7332 if (type != NULL_TREE && POINTER_TYPE_P (type))
7334 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7337 return default_promote_function_mode (type, mode, punsignedp, fntype,
7342 ix86_libcall_value (enum machine_mode mode)
7344 return ix86_function_value_1 (NULL, NULL, mode, mode);
7347 /* Return true iff type is returned in memory. */
7349 static bool ATTRIBUTE_UNUSED
7350 return_in_memory_32 (const_tree type, enum machine_mode mode)
7354 if (mode == BLKmode)
7357 size = int_size_in_bytes (type);
7359 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7362 if (VECTOR_MODE_P (mode) || mode == TImode)
7364 /* User-created vectors small enough to fit in EAX. */
7368 /* MMX/3dNow values are returned in MM0,
7369 except when it doesn't exits or the ABI prescribes otherwise. */
7371 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7373 /* SSE values are returned in XMM0, except when it doesn't exist. */
7377 /* AVX values are returned in YMM0, except when it doesn't exist. */
7388 /* OImode shouldn't be used directly. */
7389 gcc_assert (mode != OImode);
7394 static bool ATTRIBUTE_UNUSED
7395 return_in_memory_64 (const_tree type, enum machine_mode mode)
7397 int needed_intregs, needed_sseregs;
7398 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7401 static bool ATTRIBUTE_UNUSED
7402 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7404 HOST_WIDE_INT size = int_size_in_bytes (type);
7406 /* __m128 is returned in xmm0. */
7407 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7408 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7411 /* Otherwise, the size must be exactly in [1248]. */
7412 return size != 1 && size != 2 && size != 4 && size != 8;
7416 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7418 #ifdef SUBTARGET_RETURN_IN_MEMORY
7419 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7421 const enum machine_mode mode = type_natural_mode (type, NULL);
7425 if (ix86_function_type_abi (fntype) == MS_ABI)
7426 return return_in_memory_ms_64 (type, mode);
7428 return return_in_memory_64 (type, mode);
7431 return return_in_memory_32 (type, mode);
7435 /* When returning SSE vector types, we have a choice of either
7436 (1) being abi incompatible with a -march switch, or
7437 (2) generating an error.
7438 Given no good solution, I think the safest thing is one warning.
7439 The user won't be able to use -Werror, but....
7441 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7442 called in response to actually generating a caller or callee that
7443 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7444 via aggregate_value_p for general type probing from tree-ssa. */
7447 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7449 static bool warnedsse, warnedmmx;
7451 if (!TARGET_64BIT && type)
7453 /* Look at the return type of the function, not the function type. */
7454 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7456 if (!TARGET_SSE && !warnedsse)
7459 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7462 warning (0, "SSE vector return without SSE enabled "
7467 if (!TARGET_MMX && !warnedmmx)
7469 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7472 warning (0, "MMX vector return without MMX enabled "
7482 /* Create the va_list data type. */
7484 /* Returns the calling convention specific va_list date type.
7485 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7488 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7490 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7492 /* For i386 we use plain pointer to argument area. */
7493 if (!TARGET_64BIT || abi == MS_ABI)
7494 return build_pointer_type (char_type_node);
7496 record = lang_hooks.types.make_type (RECORD_TYPE);
7497 type_decl = build_decl (BUILTINS_LOCATION,
7498 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7500 f_gpr = build_decl (BUILTINS_LOCATION,
7501 FIELD_DECL, get_identifier ("gp_offset"),
7502 unsigned_type_node);
7503 f_fpr = build_decl (BUILTINS_LOCATION,
7504 FIELD_DECL, get_identifier ("fp_offset"),
7505 unsigned_type_node);
7506 f_ovf = build_decl (BUILTINS_LOCATION,
7507 FIELD_DECL, get_identifier ("overflow_arg_area"),
7509 f_sav = build_decl (BUILTINS_LOCATION,
7510 FIELD_DECL, get_identifier ("reg_save_area"),
7513 va_list_gpr_counter_field = f_gpr;
7514 va_list_fpr_counter_field = f_fpr;
7516 DECL_FIELD_CONTEXT (f_gpr) = record;
7517 DECL_FIELD_CONTEXT (f_fpr) = record;
7518 DECL_FIELD_CONTEXT (f_ovf) = record;
7519 DECL_FIELD_CONTEXT (f_sav) = record;
7521 TYPE_STUB_DECL (record) = type_decl;
7522 TYPE_NAME (record) = type_decl;
7523 TYPE_FIELDS (record) = f_gpr;
7524 DECL_CHAIN (f_gpr) = f_fpr;
7525 DECL_CHAIN (f_fpr) = f_ovf;
7526 DECL_CHAIN (f_ovf) = f_sav;
7528 layout_type (record);
7530 /* The correct type is an array type of one element. */
7531 return build_array_type (record, build_index_type (size_zero_node));
7534 /* Setup the builtin va_list data type and for 64-bit the additional
7535 calling convention specific va_list data types. */
7538 ix86_build_builtin_va_list (void)
7540 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7542 /* Initialize abi specific va_list builtin types. */
7546 if (ix86_abi == MS_ABI)
7548 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7549 if (TREE_CODE (t) != RECORD_TYPE)
7550 t = build_variant_type_copy (t);
7551 sysv_va_list_type_node = t;
7556 if (TREE_CODE (t) != RECORD_TYPE)
7557 t = build_variant_type_copy (t);
7558 sysv_va_list_type_node = t;
7560 if (ix86_abi != MS_ABI)
7562 t = ix86_build_builtin_va_list_abi (MS_ABI);
7563 if (TREE_CODE (t) != RECORD_TYPE)
7564 t = build_variant_type_copy (t);
7565 ms_va_list_type_node = t;
7570 if (TREE_CODE (t) != RECORD_TYPE)
7571 t = build_variant_type_copy (t);
7572 ms_va_list_type_node = t;
7579 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7582 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7588 /* GPR size of varargs save area. */
7589 if (cfun->va_list_gpr_size)
7590 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7592 ix86_varargs_gpr_size = 0;
7594 /* FPR size of varargs save area. We don't need it if we don't pass
7595 anything in SSE registers. */
7596 if (TARGET_SSE && cfun->va_list_fpr_size)
7597 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7599 ix86_varargs_fpr_size = 0;
7601 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7604 save_area = frame_pointer_rtx;
7605 set = get_varargs_alias_set ();
7607 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7608 if (max > X86_64_REGPARM_MAX)
7609 max = X86_64_REGPARM_MAX;
7611 for (i = cum->regno; i < max; i++)
7613 mem = gen_rtx_MEM (Pmode,
7614 plus_constant (save_area, i * UNITS_PER_WORD));
7615 MEM_NOTRAP_P (mem) = 1;
7616 set_mem_alias_set (mem, set);
7617 emit_move_insn (mem, gen_rtx_REG (Pmode,
7618 x86_64_int_parameter_registers[i]));
7621 if (ix86_varargs_fpr_size)
7623 enum machine_mode smode;
7626 /* Now emit code to save SSE registers. The AX parameter contains number
7627 of SSE parameter registers used to call this function, though all we
7628 actually check here is the zero/non-zero status. */
7630 label = gen_label_rtx ();
7631 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7632 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7635 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7636 we used movdqa (i.e. TImode) instead? Perhaps even better would
7637 be if we could determine the real mode of the data, via a hook
7638 into pass_stdarg. Ignore all that for now. */
7640 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7641 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7643 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7644 if (max > X86_64_SSE_REGPARM_MAX)
7645 max = X86_64_SSE_REGPARM_MAX;
7647 for (i = cum->sse_regno; i < max; ++i)
7649 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7650 mem = gen_rtx_MEM (smode, mem);
7651 MEM_NOTRAP_P (mem) = 1;
7652 set_mem_alias_set (mem, set);
7653 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7655 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7663 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7665 alias_set_type set = get_varargs_alias_set ();
7668 /* Reset to zero, as there might be a sysv vaarg used
7670 ix86_varargs_gpr_size = 0;
7671 ix86_varargs_fpr_size = 0;
7673 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7677 mem = gen_rtx_MEM (Pmode,
7678 plus_constant (virtual_incoming_args_rtx,
7679 i * UNITS_PER_WORD));
7680 MEM_NOTRAP_P (mem) = 1;
7681 set_mem_alias_set (mem, set);
7683 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7684 emit_move_insn (mem, reg);
7689 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7690 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7693 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7694 CUMULATIVE_ARGS next_cum;
7697 /* This argument doesn't appear to be used anymore. Which is good,
7698 because the old code here didn't suppress rtl generation. */
7699 gcc_assert (!no_rtl);
7704 fntype = TREE_TYPE (current_function_decl);
7706 /* For varargs, we do not want to skip the dummy va_dcl argument.
7707 For stdargs, we do want to skip the last named argument. */
7709 if (stdarg_p (fntype))
7710 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7713 if (cum->call_abi == MS_ABI)
7714 setup_incoming_varargs_ms_64 (&next_cum);
7716 setup_incoming_varargs_64 (&next_cum);
7719 /* Checks if TYPE is of kind va_list char *. */
7722 is_va_list_char_pointer (tree type)
7726 /* For 32-bit it is always true. */
7729 canonic = ix86_canonical_va_list_type (type);
7730 return (canonic == ms_va_list_type_node
7731 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7734 /* Implement va_start. */
7737 ix86_va_start (tree valist, rtx nextarg)
7739 HOST_WIDE_INT words, n_gpr, n_fpr;
7740 tree f_gpr, f_fpr, f_ovf, f_sav;
7741 tree gpr, fpr, ovf, sav, t;
7745 if (flag_split_stack
7746 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7748 unsigned int scratch_regno;
7750 /* When we are splitting the stack, we can't refer to the stack
7751 arguments using internal_arg_pointer, because they may be on
7752 the old stack. The split stack prologue will arrange to
7753 leave a pointer to the old stack arguments in a scratch
7754 register, which we here copy to a pseudo-register. The split
7755 stack prologue can't set the pseudo-register directly because
7756 it (the prologue) runs before any registers have been saved. */
7758 scratch_regno = split_stack_prologue_scratch_regno ();
7759 if (scratch_regno != INVALID_REGNUM)
7763 reg = gen_reg_rtx (Pmode);
7764 cfun->machine->split_stack_varargs_pointer = reg;
7767 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7771 push_topmost_sequence ();
7772 emit_insn_after (seq, entry_of_function ());
7773 pop_topmost_sequence ();
7777 /* Only 64bit target needs something special. */
7778 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7780 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7781 std_expand_builtin_va_start (valist, nextarg);
7786 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7787 next = expand_binop (ptr_mode, add_optab,
7788 cfun->machine->split_stack_varargs_pointer,
7789 crtl->args.arg_offset_rtx,
7790 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7791 convert_move (va_r, next, 0);
7796 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7797 f_fpr = DECL_CHAIN (f_gpr);
7798 f_ovf = DECL_CHAIN (f_fpr);
7799 f_sav = DECL_CHAIN (f_ovf);
7801 valist = build_simple_mem_ref (valist);
7802 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7803 /* The following should be folded into the MEM_REF offset. */
7804 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7806 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7808 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7810 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7813 /* Count number of gp and fp argument registers used. */
7814 words = crtl->args.info.words;
7815 n_gpr = crtl->args.info.regno;
7816 n_fpr = crtl->args.info.sse_regno;
7818 if (cfun->va_list_gpr_size)
7820 type = TREE_TYPE (gpr);
7821 t = build2 (MODIFY_EXPR, type,
7822 gpr, build_int_cst (type, n_gpr * 8));
7823 TREE_SIDE_EFFECTS (t) = 1;
7824 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7827 if (TARGET_SSE && cfun->va_list_fpr_size)
7829 type = TREE_TYPE (fpr);
7830 t = build2 (MODIFY_EXPR, type, fpr,
7831 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7832 TREE_SIDE_EFFECTS (t) = 1;
7833 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7836 /* Find the overflow area. */
7837 type = TREE_TYPE (ovf);
7838 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7839 ovf_rtx = crtl->args.internal_arg_pointer;
7841 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7842 t = make_tree (type, ovf_rtx);
7844 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7845 t = build2 (MODIFY_EXPR, type, ovf, t);
7846 TREE_SIDE_EFFECTS (t) = 1;
7847 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7849 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7851 /* Find the register save area.
7852 Prologue of the function save it right above stack frame. */
7853 type = TREE_TYPE (sav);
7854 t = make_tree (type, frame_pointer_rtx);
7855 if (!ix86_varargs_gpr_size)
7856 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7857 t = build2 (MODIFY_EXPR, type, sav, t);
7858 TREE_SIDE_EFFECTS (t) = 1;
7859 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7863 /* Implement va_arg. */
7866 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7869 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7870 tree f_gpr, f_fpr, f_ovf, f_sav;
7871 tree gpr, fpr, ovf, sav, t;
7873 tree lab_false, lab_over = NULL_TREE;
7878 enum machine_mode nat_mode;
7879 unsigned int arg_boundary;
7881 /* Only 64bit target needs something special. */
7882 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7883 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7885 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7886 f_fpr = DECL_CHAIN (f_gpr);
7887 f_ovf = DECL_CHAIN (f_fpr);
7888 f_sav = DECL_CHAIN (f_ovf);
7890 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7891 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7892 valist = build_va_arg_indirect_ref (valist);
7893 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7894 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7895 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7897 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7899 type = build_pointer_type (type);
7900 size = int_size_in_bytes (type);
7901 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7903 nat_mode = type_natural_mode (type, NULL);
7912 /* Unnamed 256bit vector mode parameters are passed on stack. */
7913 if (!TARGET_64BIT_MS_ABI)
7920 container = construct_container (nat_mode, TYPE_MODE (type),
7921 type, 0, X86_64_REGPARM_MAX,
7922 X86_64_SSE_REGPARM_MAX, intreg,
7927 /* Pull the value out of the saved registers. */
7929 addr = create_tmp_var (ptr_type_node, "addr");
7933 int needed_intregs, needed_sseregs;
7935 tree int_addr, sse_addr;
7937 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7938 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7940 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7942 need_temp = (!REG_P (container)
7943 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7944 || TYPE_ALIGN (type) > 128));
7946 /* In case we are passing structure, verify that it is consecutive block
7947 on the register save area. If not we need to do moves. */
7948 if (!need_temp && !REG_P (container))
7950 /* Verify that all registers are strictly consecutive */
7951 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7955 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7957 rtx slot = XVECEXP (container, 0, i);
7958 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7959 || INTVAL (XEXP (slot, 1)) != i * 16)
7967 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7969 rtx slot = XVECEXP (container, 0, i);
7970 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7971 || INTVAL (XEXP (slot, 1)) != i * 8)
7983 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7984 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7987 /* First ensure that we fit completely in registers. */
7990 t = build_int_cst (TREE_TYPE (gpr),
7991 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7992 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7993 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7994 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7995 gimplify_and_add (t, pre_p);
7999 t = build_int_cst (TREE_TYPE (fpr),
8000 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8001 + X86_64_REGPARM_MAX * 8);
8002 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8003 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8004 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8005 gimplify_and_add (t, pre_p);
8008 /* Compute index to start of area used for integer regs. */
8011 /* int_addr = gpr + sav; */
8012 t = fold_build_pointer_plus (sav, gpr);
8013 gimplify_assign (int_addr, t, pre_p);
8017 /* sse_addr = fpr + sav; */
8018 t = fold_build_pointer_plus (sav, fpr);
8019 gimplify_assign (sse_addr, t, pre_p);
8023 int i, prev_size = 0;
8024 tree temp = create_tmp_var (type, "va_arg_tmp");
8027 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8028 gimplify_assign (addr, t, pre_p);
8030 for (i = 0; i < XVECLEN (container, 0); i++)
8032 rtx slot = XVECEXP (container, 0, i);
8033 rtx reg = XEXP (slot, 0);
8034 enum machine_mode mode = GET_MODE (reg);
8040 tree dest_addr, dest;
8041 int cur_size = GET_MODE_SIZE (mode);
8043 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8044 prev_size = INTVAL (XEXP (slot, 1));
8045 if (prev_size + cur_size > size)
8047 cur_size = size - prev_size;
8048 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8049 if (mode == BLKmode)
8052 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8053 if (mode == GET_MODE (reg))
8054 addr_type = build_pointer_type (piece_type);
8056 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8058 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8061 if (SSE_REGNO_P (REGNO (reg)))
8063 src_addr = sse_addr;
8064 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8068 src_addr = int_addr;
8069 src_offset = REGNO (reg) * 8;
8071 src_addr = fold_convert (addr_type, src_addr);
8072 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8074 dest_addr = fold_convert (daddr_type, addr);
8075 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8076 if (cur_size == GET_MODE_SIZE (mode))
8078 src = build_va_arg_indirect_ref (src_addr);
8079 dest = build_va_arg_indirect_ref (dest_addr);
8081 gimplify_assign (dest, src, pre_p);
8086 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8087 3, dest_addr, src_addr,
8088 size_int (cur_size));
8089 gimplify_and_add (copy, pre_p);
8091 prev_size += cur_size;
8097 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8098 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8099 gimplify_assign (gpr, t, pre_p);
8104 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8105 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8106 gimplify_assign (fpr, t, pre_p);
8109 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8111 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8114 /* ... otherwise out of the overflow area. */
8116 /* When we align parameter on stack for caller, if the parameter
8117 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8118 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8119 here with caller. */
8120 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8121 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8122 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8124 /* Care for on-stack alignment if needed. */
8125 if (arg_boundary <= 64 || size == 0)
8129 HOST_WIDE_INT align = arg_boundary / 8;
8130 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8131 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8132 build_int_cst (TREE_TYPE (t), -align));
8135 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8136 gimplify_assign (addr, t, pre_p);
8138 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8139 gimplify_assign (unshare_expr (ovf), t, pre_p);
8142 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8144 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8145 addr = fold_convert (ptrtype, addr);
8148 addr = build_va_arg_indirect_ref (addr);
8149 return build_va_arg_indirect_ref (addr);
8152 /* Return true if OPNUM's MEM should be matched
8153 in movabs* patterns. */
8156 ix86_check_movabs (rtx insn, int opnum)
8160 set = PATTERN (insn);
8161 if (GET_CODE (set) == PARALLEL)
8162 set = XVECEXP (set, 0, 0);
8163 gcc_assert (GET_CODE (set) == SET);
8164 mem = XEXP (set, opnum);
8165 while (GET_CODE (mem) == SUBREG)
8166 mem = SUBREG_REG (mem);
8167 gcc_assert (MEM_P (mem));
8168 return volatile_ok || !MEM_VOLATILE_P (mem);
8171 /* Initialize the table of extra 80387 mathematical constants. */
8174 init_ext_80387_constants (void)
8176 static const char * cst[5] =
8178 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8179 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8180 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8181 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8182 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8186 for (i = 0; i < 5; i++)
8188 real_from_string (&ext_80387_constants_table[i], cst[i]);
8189 /* Ensure each constant is rounded to XFmode precision. */
8190 real_convert (&ext_80387_constants_table[i],
8191 XFmode, &ext_80387_constants_table[i]);
8194 ext_80387_constants_init = 1;
8197 /* Return non-zero if the constant is something that
8198 can be loaded with a special instruction. */
8201 standard_80387_constant_p (rtx x)
8203 enum machine_mode mode = GET_MODE (x);
8207 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8210 if (x == CONST0_RTX (mode))
8212 if (x == CONST1_RTX (mode))
8215 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8217 /* For XFmode constants, try to find a special 80387 instruction when
8218 optimizing for size or on those CPUs that benefit from them. */
8220 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8224 if (! ext_80387_constants_init)
8225 init_ext_80387_constants ();
8227 for (i = 0; i < 5; i++)
8228 if (real_identical (&r, &ext_80387_constants_table[i]))
8232 /* Load of the constant -0.0 or -1.0 will be split as
8233 fldz;fchs or fld1;fchs sequence. */
8234 if (real_isnegzero (&r))
8236 if (real_identical (&r, &dconstm1))
8242 /* Return the opcode of the special instruction to be used to load
8246 standard_80387_constant_opcode (rtx x)
8248 switch (standard_80387_constant_p (x))
8272 /* Return the CONST_DOUBLE representing the 80387 constant that is
8273 loaded by the specified special instruction. The argument IDX
8274 matches the return value from standard_80387_constant_p. */
8277 standard_80387_constant_rtx (int idx)
8281 if (! ext_80387_constants_init)
8282 init_ext_80387_constants ();
8298 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8302 /* Return 1 if X is all 0s and 2 if x is all 1s
8303 in supported SSE/AVX vector mode. */
8306 standard_sse_constant_p (rtx x)
8308 enum machine_mode mode = GET_MODE (x);
8310 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8312 if (vector_all_ones_operand (x, mode))
8334 /* Return the opcode of the special instruction to be used to load
8338 standard_sse_constant_opcode (rtx insn, rtx x)
8340 switch (standard_sse_constant_p (x))
8343 switch (get_attr_mode (insn))
8346 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8347 return "%vpxor\t%0, %d0";
8349 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8350 return "%vxorpd\t%0, %d0";
8352 return "%vxorps\t%0, %d0";
8355 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8356 return "vpxor\t%x0, %x0, %x0";
8358 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8359 return "vxorpd\t%x0, %x0, %x0";
8361 return "vxorps\t%x0, %x0, %x0";
8369 return "vpcmpeqd\t%0, %0, %0";
8371 return "pcmpeqd\t%0, %0";
8379 /* Returns true if OP contains a symbol reference */
8382 symbolic_reference_mentioned_p (rtx op)
8387 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8390 fmt = GET_RTX_FORMAT (GET_CODE (op));
8391 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8397 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8398 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8402 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8409 /* Return true if it is appropriate to emit `ret' instructions in the
8410 body of a function. Do this only if the epilogue is simple, needing a
8411 couple of insns. Prior to reloading, we can't tell how many registers
8412 must be saved, so return false then. Return false if there is no frame
8413 marker to de-allocate. */
8416 ix86_can_use_return_insn_p (void)
8418 struct ix86_frame frame;
8420 if (! reload_completed || frame_pointer_needed)
8423 /* Don't allow more than 32k pop, since that's all we can do
8424 with one instruction. */
8425 if (crtl->args.pops_args && crtl->args.size >= 32768)
8428 ix86_compute_frame_layout (&frame);
8429 return (frame.stack_pointer_offset == UNITS_PER_WORD
8430 && (frame.nregs + frame.nsseregs) == 0);
8433 /* Value should be nonzero if functions must have frame pointers.
8434 Zero means the frame pointer need not be set up (and parms may
8435 be accessed via the stack pointer) in functions that seem suitable. */
8438 ix86_frame_pointer_required (void)
8440 /* If we accessed previous frames, then the generated code expects
8441 to be able to access the saved ebp value in our frame. */
8442 if (cfun->machine->accesses_prev_frame)
8445 /* Several x86 os'es need a frame pointer for other reasons,
8446 usually pertaining to setjmp. */
8447 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8450 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8451 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
8454 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8455 allocation is 4GB. */
8456 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
8459 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8460 turns off the frame pointer by default. Turn it back on now if
8461 we've not got a leaf function. */
8462 if (TARGET_OMIT_LEAF_FRAME_POINTER
8463 && (!current_function_is_leaf
8464 || ix86_current_function_calls_tls_descriptor))
8467 if (crtl->profile && !flag_fentry)
8473 /* Record that the current function accesses previous call frames. */
8476 ix86_setup_frame_addresses (void)
8478 cfun->machine->accesses_prev_frame = 1;
8481 #ifndef USE_HIDDEN_LINKONCE
8482 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8483 # define USE_HIDDEN_LINKONCE 1
8485 # define USE_HIDDEN_LINKONCE 0
8489 static int pic_labels_used;
8491 /* Fills in the label name that should be used for a pc thunk for
8492 the given register. */
8495 get_pc_thunk_name (char name[32], unsigned int regno)
8497 gcc_assert (!TARGET_64BIT);
8499 if (USE_HIDDEN_LINKONCE)
8500 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
8502 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8506 /* This function generates code for -fpic that loads %ebx with
8507 the return address of the caller and then returns. */
8510 ix86_code_end (void)
8515 for (regno = AX_REG; regno <= SP_REG; regno++)
8520 if (!(pic_labels_used & (1 << regno)))
8523 get_pc_thunk_name (name, regno);
8525 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8526 get_identifier (name),
8527 build_function_type_list (void_type_node, NULL_TREE));
8528 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8529 NULL_TREE, void_type_node);
8530 TREE_PUBLIC (decl) = 1;
8531 TREE_STATIC (decl) = 1;
8536 switch_to_section (darwin_sections[text_coal_section]);
8537 fputs ("\t.weak_definition\t", asm_out_file);
8538 assemble_name (asm_out_file, name);
8539 fputs ("\n\t.private_extern\t", asm_out_file);
8540 assemble_name (asm_out_file, name);
8541 putc ('\n', asm_out_file);
8542 ASM_OUTPUT_LABEL (asm_out_file, name);
8543 DECL_WEAK (decl) = 1;
8547 if (USE_HIDDEN_LINKONCE)
8549 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8551 targetm.asm_out.unique_section (decl, 0);
8552 switch_to_section (get_named_section (decl, NULL, 0));
8554 targetm.asm_out.globalize_label (asm_out_file, name);
8555 fputs ("\t.hidden\t", asm_out_file);
8556 assemble_name (asm_out_file, name);
8557 putc ('\n', asm_out_file);
8558 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8562 switch_to_section (text_section);
8563 ASM_OUTPUT_LABEL (asm_out_file, name);
8566 DECL_INITIAL (decl) = make_node (BLOCK);
8567 current_function_decl = decl;
8568 init_function_start (decl);
8569 first_function_block_is_cold = false;
8570 /* Make sure unwind info is emitted for the thunk if needed. */
8571 final_start_function (emit_barrier (), asm_out_file, 1);
8573 /* Pad stack IP move with 4 instructions (two NOPs count
8574 as one instruction). */
8575 if (TARGET_PAD_SHORT_FUNCTION)
8580 fputs ("\tnop\n", asm_out_file);
8583 xops[0] = gen_rtx_REG (Pmode, regno);
8584 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8585 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8586 fputs ("\tret\n", asm_out_file);
8587 final_end_function ();
8588 init_insn_lengths ();
8589 free_after_compilation (cfun);
8591 current_function_decl = NULL;
8594 if (flag_split_stack)
8595 file_end_indicate_split_stack ();
8598 /* Emit code for the SET_GOT patterns. */
8601 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8607 if (TARGET_VXWORKS_RTP && flag_pic)
8609 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8610 xops[2] = gen_rtx_MEM (Pmode,
8611 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8612 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8614 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8615 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8616 an unadorned address. */
8617 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8618 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8619 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8623 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8628 /* We don't need a pic base, we're not producing pic. */
8631 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8632 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8633 targetm.asm_out.internal_label (asm_out_file, "L",
8634 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8639 get_pc_thunk_name (name, REGNO (dest));
8640 pic_labels_used |= 1 << REGNO (dest);
8642 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8643 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8644 output_asm_insn ("call\t%X2", xops);
8647 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
8648 This is what will be referenced by the Mach-O PIC subsystem. */
8649 if (machopic_should_output_picbase_label () || !label)
8650 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8652 /* When we are restoring the pic base at the site of a nonlocal label,
8653 and we decided to emit the pic base above, we will still output a
8654 local label used for calculating the correction offset (even though
8655 the offset will be 0 in that case). */
8657 targetm.asm_out.internal_label (asm_out_file, "L",
8658 CODE_LABEL_NUMBER (label));
8663 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8668 /* Generate an "push" pattern for input ARG. */
8673 struct machine_function *m = cfun->machine;
8675 if (m->fs.cfa_reg == stack_pointer_rtx)
8676 m->fs.cfa_offset += UNITS_PER_WORD;
8677 m->fs.sp_offset += UNITS_PER_WORD;
8679 return gen_rtx_SET (VOIDmode,
8681 gen_rtx_PRE_DEC (Pmode,
8682 stack_pointer_rtx)),
8686 /* Generate an "pop" pattern for input ARG. */
8691 return gen_rtx_SET (VOIDmode,
8694 gen_rtx_POST_INC (Pmode,
8695 stack_pointer_rtx)));
8698 /* Return >= 0 if there is an unused call-clobbered register available
8699 for the entire function. */
8702 ix86_select_alt_pic_regnum (void)
8704 if (current_function_is_leaf
8706 && !ix86_current_function_calls_tls_descriptor)
8709 /* Can't use the same register for both PIC and DRAP. */
8711 drap = REGNO (crtl->drap_reg);
8714 for (i = 2; i >= 0; --i)
8715 if (i != drap && !df_regs_ever_live_p (i))
8719 return INVALID_REGNUM;
8722 /* Return TRUE if we need to save REGNO. */
8725 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8727 if (pic_offset_table_rtx
8728 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8729 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8731 || crtl->calls_eh_return
8732 || crtl->uses_const_pool
8733 || cfun->has_nonlocal_label))
8734 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8736 if (crtl->calls_eh_return && maybe_eh_return)
8741 unsigned test = EH_RETURN_DATA_REGNO (i);
8742 if (test == INVALID_REGNUM)
8749 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8752 return (df_regs_ever_live_p (regno)
8753 && !call_used_regs[regno]
8754 && !fixed_regs[regno]
8755 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8758 /* Return number of saved general prupose registers. */
8761 ix86_nsaved_regs (void)
8766 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8767 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8772 /* Return number of saved SSE registrers. */
8775 ix86_nsaved_sseregs (void)
8780 if (!TARGET_64BIT_MS_ABI)
8782 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8783 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8788 /* Given FROM and TO register numbers, say whether this elimination is
8789 allowed. If stack alignment is needed, we can only replace argument
8790 pointer with hard frame pointer, or replace frame pointer with stack
8791 pointer. Otherwise, frame pointer elimination is automatically
8792 handled and all other eliminations are valid. */
8795 ix86_can_eliminate (const int from, const int to)
8797 if (stack_realign_fp)
8798 return ((from == ARG_POINTER_REGNUM
8799 && to == HARD_FRAME_POINTER_REGNUM)
8800 || (from == FRAME_POINTER_REGNUM
8801 && to == STACK_POINTER_REGNUM));
8803 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8806 /* Return the offset between two registers, one to be eliminated, and the other
8807 its replacement, at the start of a routine. */
8810 ix86_initial_elimination_offset (int from, int to)
8812 struct ix86_frame frame;
8813 ix86_compute_frame_layout (&frame);
8815 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8816 return frame.hard_frame_pointer_offset;
8817 else if (from == FRAME_POINTER_REGNUM
8818 && to == HARD_FRAME_POINTER_REGNUM)
8819 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8822 gcc_assert (to == STACK_POINTER_REGNUM);
8824 if (from == ARG_POINTER_REGNUM)
8825 return frame.stack_pointer_offset;
8827 gcc_assert (from == FRAME_POINTER_REGNUM);
8828 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8832 /* In a dynamically-aligned function, we can't know the offset from
8833 stack pointer to frame pointer, so we must ensure that setjmp
8834 eliminates fp against the hard fp (%ebp) rather than trying to
8835 index from %esp up to the top of the frame across a gap that is
8836 of unknown (at compile-time) size. */
8838 ix86_builtin_setjmp_frame_value (void)
8840 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8843 /* When using -fsplit-stack, the allocation routines set a field in
8844 the TCB to the bottom of the stack plus this much space, measured
8847 #define SPLIT_STACK_AVAILABLE 256
8849 /* Fill structure ix86_frame about frame of currently computed function. */
8852 ix86_compute_frame_layout (struct ix86_frame *frame)
8854 unsigned int stack_alignment_needed;
8855 HOST_WIDE_INT offset;
8856 unsigned int preferred_alignment;
8857 HOST_WIDE_INT size = get_frame_size ();
8858 HOST_WIDE_INT to_allocate;
8860 frame->nregs = ix86_nsaved_regs ();
8861 frame->nsseregs = ix86_nsaved_sseregs ();
8863 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8864 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8866 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8867 function prologues and leaf. */
8868 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8869 && (!current_function_is_leaf || cfun->calls_alloca != 0
8870 || ix86_current_function_calls_tls_descriptor))
8872 preferred_alignment = 16;
8873 stack_alignment_needed = 16;
8874 crtl->preferred_stack_boundary = 128;
8875 crtl->stack_alignment_needed = 128;
8878 gcc_assert (!size || stack_alignment_needed);
8879 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8880 gcc_assert (preferred_alignment <= stack_alignment_needed);
8882 /* For SEH we have to limit the amount of code movement into the prologue.
8883 At present we do this via a BLOCKAGE, at which point there's very little
8884 scheduling that can be done, which means that there's very little point
8885 in doing anything except PUSHs. */
8887 cfun->machine->use_fast_prologue_epilogue = false;
8889 /* During reload iteration the amount of registers saved can change.
8890 Recompute the value as needed. Do not recompute when amount of registers
8891 didn't change as reload does multiple calls to the function and does not
8892 expect the decision to change within single iteration. */
8893 else if (!optimize_function_for_size_p (cfun)
8894 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8896 int count = frame->nregs;
8897 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8899 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8901 /* The fast prologue uses move instead of push to save registers. This
8902 is significantly longer, but also executes faster as modern hardware
8903 can execute the moves in parallel, but can't do that for push/pop.
8905 Be careful about choosing what prologue to emit: When function takes
8906 many instructions to execute we may use slow version as well as in
8907 case function is known to be outside hot spot (this is known with
8908 feedback only). Weight the size of function by number of registers
8909 to save as it is cheap to use one or two push instructions but very
8910 slow to use many of them. */
8912 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8913 if (node->frequency < NODE_FREQUENCY_NORMAL
8914 || (flag_branch_probabilities
8915 && node->frequency < NODE_FREQUENCY_HOT))
8916 cfun->machine->use_fast_prologue_epilogue = false;
8918 cfun->machine->use_fast_prologue_epilogue
8919 = !expensive_function_p (count);
8922 frame->save_regs_using_mov
8923 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
8924 /* If static stack checking is enabled and done with probes,
8925 the registers need to be saved before allocating the frame. */
8926 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
8928 /* Skip return address. */
8929 offset = UNITS_PER_WORD;
8931 /* Skip pushed static chain. */
8932 if (ix86_static_chain_on_stack)
8933 offset += UNITS_PER_WORD;
8935 /* Skip saved base pointer. */
8936 if (frame_pointer_needed)
8937 offset += UNITS_PER_WORD;
8938 frame->hfp_save_offset = offset;
8940 /* The traditional frame pointer location is at the top of the frame. */
8941 frame->hard_frame_pointer_offset = offset;
8943 /* Register save area */
8944 offset += frame->nregs * UNITS_PER_WORD;
8945 frame->reg_save_offset = offset;
8947 /* On SEH target, registers are pushed just before the frame pointer
8950 frame->hard_frame_pointer_offset = offset;
8952 /* Align and set SSE register save area. */
8953 if (frame->nsseregs)
8955 /* The only ABI that has saved SSE registers (Win64) also has a
8956 16-byte aligned default stack, and thus we don't need to be
8957 within the re-aligned local stack frame to save them. */
8958 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8959 offset = (offset + 16 - 1) & -16;
8960 offset += frame->nsseregs * 16;
8962 frame->sse_reg_save_offset = offset;
8964 /* The re-aligned stack starts here. Values before this point are not
8965 directly comparable with values below this point. In order to make
8966 sure that no value happens to be the same before and after, force
8967 the alignment computation below to add a non-zero value. */
8968 if (stack_realign_fp)
8969 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8972 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8973 offset += frame->va_arg_size;
8975 /* Align start of frame for local function. */
8976 if (stack_realign_fp
8977 || offset != frame->sse_reg_save_offset
8979 || !current_function_is_leaf
8980 || cfun->calls_alloca
8981 || ix86_current_function_calls_tls_descriptor)
8982 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8984 /* Frame pointer points here. */
8985 frame->frame_pointer_offset = offset;
8989 /* Add outgoing arguments area. Can be skipped if we eliminated
8990 all the function calls as dead code.
8991 Skipping is however impossible when function calls alloca. Alloca
8992 expander assumes that last crtl->outgoing_args_size
8993 of stack frame are unused. */
8994 if (ACCUMULATE_OUTGOING_ARGS
8995 && (!current_function_is_leaf || cfun->calls_alloca
8996 || ix86_current_function_calls_tls_descriptor))
8998 offset += crtl->outgoing_args_size;
8999 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9002 frame->outgoing_arguments_size = 0;
9004 /* Align stack boundary. Only needed if we're calling another function
9006 if (!current_function_is_leaf || cfun->calls_alloca
9007 || ix86_current_function_calls_tls_descriptor)
9008 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9010 /* We've reached end of stack frame. */
9011 frame->stack_pointer_offset = offset;
9013 /* Size prologue needs to allocate. */
9014 to_allocate = offset - frame->sse_reg_save_offset;
9016 if ((!to_allocate && frame->nregs <= 1)
9017 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9018 frame->save_regs_using_mov = false;
9020 if (ix86_using_red_zone ()
9021 && current_function_sp_is_unchanging
9022 && current_function_is_leaf
9023 && !ix86_current_function_calls_tls_descriptor)
9025 frame->red_zone_size = to_allocate;
9026 if (frame->save_regs_using_mov)
9027 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9028 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9029 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9032 frame->red_zone_size = 0;
9033 frame->stack_pointer_offset -= frame->red_zone_size;
9035 /* The SEH frame pointer location is near the bottom of the frame.
9036 This is enforced by the fact that the difference between the
9037 stack pointer and the frame pointer is limited to 240 bytes in
9038 the unwind data structure. */
9043 /* If we can leave the frame pointer where it is, do so. Also, returns
9044 the establisher frame for __builtin_frame_address (0). */
9045 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9046 if (diff <= SEH_MAX_FRAME_SIZE
9047 && (diff > 240 || (diff & 15) != 0)
9048 && !crtl->accesses_prior_frames)
9050 /* Ideally we'd determine what portion of the local stack frame
9051 (within the constraint of the lowest 240) is most heavily used.
9052 But without that complication, simply bias the frame pointer
9053 by 128 bytes so as to maximize the amount of the local stack
9054 frame that is addressable with 8-bit offsets. */
9055 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9060 /* This is semi-inlined memory_address_length, but simplified
9061 since we know that we're always dealing with reg+offset, and
9062 to avoid having to create and discard all that rtl. */
9065 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9071 /* EBP and R13 cannot be encoded without an offset. */
9072 len = (regno == BP_REG || regno == R13_REG);
9074 else if (IN_RANGE (offset, -128, 127))
9077 /* ESP and R12 must be encoded with a SIB byte. */
9078 if (regno == SP_REG || regno == R12_REG)
9084 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9085 The valid base registers are taken from CFUN->MACHINE->FS. */
9088 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9090 const struct machine_function *m = cfun->machine;
9091 rtx base_reg = NULL;
9092 HOST_WIDE_INT base_offset = 0;
9094 if (m->use_fast_prologue_epilogue)
9096 /* Choose the base register most likely to allow the most scheduling
9097 opportunities. Generally FP is valid througout the function,
9098 while DRAP must be reloaded within the epilogue. But choose either
9099 over the SP due to increased encoding size. */
9103 base_reg = hard_frame_pointer_rtx;
9104 base_offset = m->fs.fp_offset - cfa_offset;
9106 else if (m->fs.drap_valid)
9108 base_reg = crtl->drap_reg;
9109 base_offset = 0 - cfa_offset;
9111 else if (m->fs.sp_valid)
9113 base_reg = stack_pointer_rtx;
9114 base_offset = m->fs.sp_offset - cfa_offset;
9119 HOST_WIDE_INT toffset;
9122 /* Choose the base register with the smallest address encoding.
9123 With a tie, choose FP > DRAP > SP. */
9126 base_reg = stack_pointer_rtx;
9127 base_offset = m->fs.sp_offset - cfa_offset;
9128 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9130 if (m->fs.drap_valid)
9132 toffset = 0 - cfa_offset;
9133 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9136 base_reg = crtl->drap_reg;
9137 base_offset = toffset;
9143 toffset = m->fs.fp_offset - cfa_offset;
9144 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9147 base_reg = hard_frame_pointer_rtx;
9148 base_offset = toffset;
9153 gcc_assert (base_reg != NULL);
9155 return plus_constant (base_reg, base_offset);
9158 /* Emit code to save registers in the prologue. */
9161 ix86_emit_save_regs (void)
9166 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9167 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9169 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9170 RTX_FRAME_RELATED_P (insn) = 1;
9174 /* Emit a single register save at CFA - CFA_OFFSET. */
9177 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9178 HOST_WIDE_INT cfa_offset)
9180 struct machine_function *m = cfun->machine;
9181 rtx reg = gen_rtx_REG (mode, regno);
9182 rtx mem, addr, base, insn;
9184 addr = choose_baseaddr (cfa_offset);
9185 mem = gen_frame_mem (mode, addr);
9187 /* For SSE saves, we need to indicate the 128-bit alignment. */
9188 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9190 insn = emit_move_insn (mem, reg);
9191 RTX_FRAME_RELATED_P (insn) = 1;
9194 if (GET_CODE (base) == PLUS)
9195 base = XEXP (base, 0);
9196 gcc_checking_assert (REG_P (base));
9198 /* When saving registers into a re-aligned local stack frame, avoid
9199 any tricky guessing by dwarf2out. */
9200 if (m->fs.realigned)
9202 gcc_checking_assert (stack_realign_drap);
9204 if (regno == REGNO (crtl->drap_reg))
9206 /* A bit of a hack. We force the DRAP register to be saved in
9207 the re-aligned stack frame, which provides us with a copy
9208 of the CFA that will last past the prologue. Install it. */
9209 gcc_checking_assert (cfun->machine->fs.fp_valid);
9210 addr = plus_constant (hard_frame_pointer_rtx,
9211 cfun->machine->fs.fp_offset - cfa_offset);
9212 mem = gen_rtx_MEM (mode, addr);
9213 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9217 /* The frame pointer is a stable reference within the
9218 aligned frame. Use it. */
9219 gcc_checking_assert (cfun->machine->fs.fp_valid);
9220 addr = plus_constant (hard_frame_pointer_rtx,
9221 cfun->machine->fs.fp_offset - cfa_offset);
9222 mem = gen_rtx_MEM (mode, addr);
9223 add_reg_note (insn, REG_CFA_EXPRESSION,
9224 gen_rtx_SET (VOIDmode, mem, reg));
9228 /* The memory may not be relative to the current CFA register,
9229 which means that we may need to generate a new pattern for
9230 use by the unwind info. */
9231 else if (base != m->fs.cfa_reg)
9233 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9234 mem = gen_rtx_MEM (mode, addr);
9235 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9239 /* Emit code to save registers using MOV insns.
9240 First register is stored at CFA - CFA_OFFSET. */
9242 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9246 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9247 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9249 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9250 cfa_offset -= UNITS_PER_WORD;
9254 /* Emit code to save SSE registers using MOV insns.
9255 First register is stored at CFA - CFA_OFFSET. */
9257 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9261 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9262 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9264 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9269 static GTY(()) rtx queued_cfa_restores;
9271 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9272 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9273 Don't add the note if the previously saved value will be left untouched
9274 within stack red-zone till return, as unwinders can find the same value
9275 in the register and on the stack. */
9278 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9280 if (!crtl->shrink_wrapped
9281 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9286 add_reg_note (insn, REG_CFA_RESTORE, reg);
9287 RTX_FRAME_RELATED_P (insn) = 1;
9291 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9294 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9297 ix86_add_queued_cfa_restore_notes (rtx insn)
9300 if (!queued_cfa_restores)
9302 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9304 XEXP (last, 1) = REG_NOTES (insn);
9305 REG_NOTES (insn) = queued_cfa_restores;
9306 queued_cfa_restores = NULL_RTX;
9307 RTX_FRAME_RELATED_P (insn) = 1;
9310 /* Expand prologue or epilogue stack adjustment.
9311 The pattern exist to put a dependency on all ebp-based memory accesses.
9312 STYLE should be negative if instructions should be marked as frame related,
9313 zero if %r11 register is live and cannot be freely used and positive
9317 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9318 int style, bool set_cfa)
9320 struct machine_function *m = cfun->machine;
9322 bool add_frame_related_expr = false;
9325 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9326 else if (x86_64_immediate_operand (offset, DImode))
9327 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9331 /* r11 is used by indirect sibcall return as well, set before the
9332 epilogue and used after the epilogue. */
9334 tmp = gen_rtx_REG (DImode, R11_REG);
9337 gcc_assert (src != hard_frame_pointer_rtx
9338 && dest != hard_frame_pointer_rtx);
9339 tmp = hard_frame_pointer_rtx;
9341 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9343 add_frame_related_expr = true;
9345 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9348 insn = emit_insn (insn);
9350 ix86_add_queued_cfa_restore_notes (insn);
9356 gcc_assert (m->fs.cfa_reg == src);
9357 m->fs.cfa_offset += INTVAL (offset);
9358 m->fs.cfa_reg = dest;
9360 r = gen_rtx_PLUS (Pmode, src, offset);
9361 r = gen_rtx_SET (VOIDmode, dest, r);
9362 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9363 RTX_FRAME_RELATED_P (insn) = 1;
9367 RTX_FRAME_RELATED_P (insn) = 1;
9368 if (add_frame_related_expr)
9370 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9371 r = gen_rtx_SET (VOIDmode, dest, r);
9372 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9376 if (dest == stack_pointer_rtx)
9378 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9379 bool valid = m->fs.sp_valid;
9381 if (src == hard_frame_pointer_rtx)
9383 valid = m->fs.fp_valid;
9384 ooffset = m->fs.fp_offset;
9386 else if (src == crtl->drap_reg)
9388 valid = m->fs.drap_valid;
9393 /* Else there are two possibilities: SP itself, which we set
9394 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9395 taken care of this by hand along the eh_return path. */
9396 gcc_checking_assert (src == stack_pointer_rtx
9397 || offset == const0_rtx);
9400 m->fs.sp_offset = ooffset - INTVAL (offset);
9401 m->fs.sp_valid = valid;
9405 /* Find an available register to be used as dynamic realign argument
9406 pointer regsiter. Such a register will be written in prologue and
9407 used in begin of body, so it must not be
9408 1. parameter passing register.
9410 We reuse static-chain register if it is available. Otherwise, we
9411 use DI for i386 and R13 for x86-64. We chose R13 since it has
9414 Return: the regno of chosen register. */
9417 find_drap_reg (void)
9419 tree decl = cfun->decl;
9423 /* Use R13 for nested function or function need static chain.
9424 Since function with tail call may use any caller-saved
9425 registers in epilogue, DRAP must not use caller-saved
9426 register in such case. */
9427 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9434 /* Use DI for nested function or function need static chain.
9435 Since function with tail call may use any caller-saved
9436 registers in epilogue, DRAP must not use caller-saved
9437 register in such case. */
9438 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9441 /* Reuse static chain register if it isn't used for parameter
9443 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9445 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9446 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9453 /* Return minimum incoming stack alignment. */
9456 ix86_minimum_incoming_stack_boundary (bool sibcall)
9458 unsigned int incoming_stack_boundary;
9460 /* Prefer the one specified at command line. */
9461 if (ix86_user_incoming_stack_boundary)
9462 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9463 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9464 if -mstackrealign is used, it isn't used for sibcall check and
9465 estimated stack alignment is 128bit. */
9468 && ix86_force_align_arg_pointer
9469 && crtl->stack_alignment_estimated == 128)
9470 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9472 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9474 /* Incoming stack alignment can be changed on individual functions
9475 via force_align_arg_pointer attribute. We use the smallest
9476 incoming stack boundary. */
9477 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9478 && lookup_attribute (ix86_force_align_arg_pointer_string,
9479 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9480 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9482 /* The incoming stack frame has to be aligned at least at
9483 parm_stack_boundary. */
9484 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9485 incoming_stack_boundary = crtl->parm_stack_boundary;
9487 /* Stack at entrance of main is aligned by runtime. We use the
9488 smallest incoming stack boundary. */
9489 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9490 && DECL_NAME (current_function_decl)
9491 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9492 && DECL_FILE_SCOPE_P (current_function_decl))
9493 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9495 return incoming_stack_boundary;
9498 /* Update incoming stack boundary and estimated stack alignment. */
9501 ix86_update_stack_boundary (void)
9503 ix86_incoming_stack_boundary
9504 = ix86_minimum_incoming_stack_boundary (false);
9506 /* x86_64 vararg needs 16byte stack alignment for register save
9510 && crtl->stack_alignment_estimated < 128)
9511 crtl->stack_alignment_estimated = 128;
9514 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9515 needed or an rtx for DRAP otherwise. */
9518 ix86_get_drap_rtx (void)
9520 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9521 crtl->need_drap = true;
9523 if (stack_realign_drap)
9525 /* Assign DRAP to vDRAP and returns vDRAP */
9526 unsigned int regno = find_drap_reg ();
9531 arg_ptr = gen_rtx_REG (Pmode, regno);
9532 crtl->drap_reg = arg_ptr;
9535 drap_vreg = copy_to_reg (arg_ptr);
9539 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9542 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9543 RTX_FRAME_RELATED_P (insn) = 1;
9551 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9554 ix86_internal_arg_pointer (void)
9556 return virtual_incoming_args_rtx;
9559 struct scratch_reg {
9564 /* Return a short-lived scratch register for use on function entry.
9565 In 32-bit mode, it is valid only after the registers are saved
9566 in the prologue. This register must be released by means of
9567 release_scratch_register_on_entry once it is dead. */
9570 get_scratch_register_on_entry (struct scratch_reg *sr)
9578 /* We always use R11 in 64-bit mode. */
9583 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9585 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9587 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9588 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9589 int regparm = ix86_function_regparm (fntype, decl);
9591 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9593 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9594 for the static chain register. */
9595 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9596 && drap_regno != AX_REG)
9598 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
9599 for the static chain register. */
9600 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
9602 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
9604 /* ecx is the static chain register. */
9605 else if (regparm < 3 && !fastcall_p && !thiscall_p
9607 && drap_regno != CX_REG)
9609 else if (ix86_save_reg (BX_REG, true))
9611 /* esi is the static chain register. */
9612 else if (!(regparm == 3 && static_chain_p)
9613 && ix86_save_reg (SI_REG, true))
9615 else if (ix86_save_reg (DI_REG, true))
9619 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9624 sr->reg = gen_rtx_REG (Pmode, regno);
9627 rtx insn = emit_insn (gen_push (sr->reg));
9628 RTX_FRAME_RELATED_P (insn) = 1;
9632 /* Release a scratch register obtained from the preceding function. */
9635 release_scratch_register_on_entry (struct scratch_reg *sr)
9639 struct machine_function *m = cfun->machine;
9640 rtx x, insn = emit_insn (gen_pop (sr->reg));
9642 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9643 RTX_FRAME_RELATED_P (insn) = 1;
9644 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9645 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9646 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9647 m->fs.sp_offset -= UNITS_PER_WORD;
9651 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9653 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9656 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9658 /* We skip the probe for the first interval + a small dope of 4 words and
9659 probe that many bytes past the specified size to maintain a protection
9660 area at the botton of the stack. */
9661 const int dope = 4 * UNITS_PER_WORD;
9662 rtx size_rtx = GEN_INT (size), last;
9664 /* See if we have a constant small number of probes to generate. If so,
9665 that's the easy case. The run-time loop is made up of 11 insns in the
9666 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9667 for n # of intervals. */
9668 if (size <= 5 * PROBE_INTERVAL)
9670 HOST_WIDE_INT i, adjust;
9671 bool first_probe = true;
9673 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9674 values of N from 1 until it exceeds SIZE. If only one probe is
9675 needed, this will not generate any code. Then adjust and probe
9676 to PROBE_INTERVAL + SIZE. */
9677 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9681 adjust = 2 * PROBE_INTERVAL + dope;
9682 first_probe = false;
9685 adjust = PROBE_INTERVAL;
9687 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9688 plus_constant (stack_pointer_rtx, -adjust)));
9689 emit_stack_probe (stack_pointer_rtx);
9693 adjust = size + PROBE_INTERVAL + dope;
9695 adjust = size + PROBE_INTERVAL - i;
9697 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9698 plus_constant (stack_pointer_rtx, -adjust)));
9699 emit_stack_probe (stack_pointer_rtx);
9701 /* Adjust back to account for the additional first interval. */
9702 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9703 plus_constant (stack_pointer_rtx,
9704 PROBE_INTERVAL + dope)));
9707 /* Otherwise, do the same as above, but in a loop. Note that we must be
9708 extra careful with variables wrapping around because we might be at
9709 the very top (or the very bottom) of the address space and we have
9710 to be able to handle this case properly; in particular, we use an
9711 equality test for the loop condition. */
9714 HOST_WIDE_INT rounded_size;
9715 struct scratch_reg sr;
9717 get_scratch_register_on_entry (&sr);
9720 /* Step 1: round SIZE to the previous multiple of the interval. */
9722 rounded_size = size & -PROBE_INTERVAL;
9725 /* Step 2: compute initial and final value of the loop counter. */
9727 /* SP = SP_0 + PROBE_INTERVAL. */
9728 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9729 plus_constant (stack_pointer_rtx,
9730 - (PROBE_INTERVAL + dope))));
9732 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9733 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9734 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9735 gen_rtx_PLUS (Pmode, sr.reg,
9736 stack_pointer_rtx)));
9741 while (SP != LAST_ADDR)
9743 SP = SP + PROBE_INTERVAL
9747 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9748 values of N from 1 until it is equal to ROUNDED_SIZE. */
9750 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9753 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9754 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9756 if (size != rounded_size)
9758 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9759 plus_constant (stack_pointer_rtx,
9760 rounded_size - size)));
9761 emit_stack_probe (stack_pointer_rtx);
9764 /* Adjust back to account for the additional first interval. */
9765 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9766 plus_constant (stack_pointer_rtx,
9767 PROBE_INTERVAL + dope)));
9769 release_scratch_register_on_entry (&sr);
9772 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9774 /* Even if the stack pointer isn't the CFA register, we need to correctly
9775 describe the adjustments made to it, in particular differentiate the
9776 frame-related ones from the frame-unrelated ones. */
9779 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9780 XVECEXP (expr, 0, 0)
9781 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9782 plus_constant (stack_pointer_rtx, -size));
9783 XVECEXP (expr, 0, 1)
9784 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9785 plus_constant (stack_pointer_rtx,
9786 PROBE_INTERVAL + dope + size));
9787 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9788 RTX_FRAME_RELATED_P (last) = 1;
9790 cfun->machine->fs.sp_offset += size;
9793 /* Make sure nothing is scheduled before we are done. */
9794 emit_insn (gen_blockage ());
9797 /* Adjust the stack pointer up to REG while probing it. */
9800 output_adjust_stack_and_probe (rtx reg)
9802 static int labelno = 0;
9803 char loop_lab[32], end_lab[32];
9806 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9807 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9809 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9811 /* Jump to END_LAB if SP == LAST_ADDR. */
9812 xops[0] = stack_pointer_rtx;
9814 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9815 fputs ("\tje\t", asm_out_file);
9816 assemble_name_raw (asm_out_file, end_lab);
9817 fputc ('\n', asm_out_file);
9819 /* SP = SP + PROBE_INTERVAL. */
9820 xops[1] = GEN_INT (PROBE_INTERVAL);
9821 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9824 xops[1] = const0_rtx;
9825 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9827 fprintf (asm_out_file, "\tjmp\t");
9828 assemble_name_raw (asm_out_file, loop_lab);
9829 fputc ('\n', asm_out_file);
9831 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9836 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9837 inclusive. These are offsets from the current stack pointer. */
9840 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9842 /* See if we have a constant small number of probes to generate. If so,
9843 that's the easy case. The run-time loop is made up of 7 insns in the
9844 generic case while the compile-time loop is made up of n insns for n #
9846 if (size <= 7 * PROBE_INTERVAL)
9850 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9851 it exceeds SIZE. If only one probe is needed, this will not
9852 generate any code. Then probe at FIRST + SIZE. */
9853 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9854 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9856 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9859 /* Otherwise, do the same as above, but in a loop. Note that we must be
9860 extra careful with variables wrapping around because we might be at
9861 the very top (or the very bottom) of the address space and we have
9862 to be able to handle this case properly; in particular, we use an
9863 equality test for the loop condition. */
9866 HOST_WIDE_INT rounded_size, last;
9867 struct scratch_reg sr;
9869 get_scratch_register_on_entry (&sr);
9872 /* Step 1: round SIZE to the previous multiple of the interval. */
9874 rounded_size = size & -PROBE_INTERVAL;
9877 /* Step 2: compute initial and final value of the loop counter. */
9879 /* TEST_OFFSET = FIRST. */
9880 emit_move_insn (sr.reg, GEN_INT (-first));
9882 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9883 last = first + rounded_size;
9888 while (TEST_ADDR != LAST_ADDR)
9890 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9894 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9895 until it is equal to ROUNDED_SIZE. */
9897 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9900 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9901 that SIZE is equal to ROUNDED_SIZE. */
9903 if (size != rounded_size)
9904 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9907 rounded_size - size));
9909 release_scratch_register_on_entry (&sr);
9912 /* Make sure nothing is scheduled before we are done. */
9913 emit_insn (gen_blockage ());
9916 /* Probe a range of stack addresses from REG to END, inclusive. These are
9917 offsets from the current stack pointer. */
9920 output_probe_stack_range (rtx reg, rtx end)
9922 static int labelno = 0;
9923 char loop_lab[32], end_lab[32];
9926 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9927 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9929 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9931 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9934 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9935 fputs ("\tje\t", asm_out_file);
9936 assemble_name_raw (asm_out_file, end_lab);
9937 fputc ('\n', asm_out_file);
9939 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9940 xops[1] = GEN_INT (PROBE_INTERVAL);
9941 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9943 /* Probe at TEST_ADDR. */
9944 xops[0] = stack_pointer_rtx;
9946 xops[2] = const0_rtx;
9947 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9949 fprintf (asm_out_file, "\tjmp\t");
9950 assemble_name_raw (asm_out_file, loop_lab);
9951 fputc ('\n', asm_out_file);
9953 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9958 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9959 to be generated in correct form. */
9961 ix86_finalize_stack_realign_flags (void)
9963 /* Check if stack realign is really needed after reload, and
9964 stores result in cfun */
9965 unsigned int incoming_stack_boundary
9966 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9967 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9968 unsigned int stack_realign = (incoming_stack_boundary
9969 < (current_function_is_leaf
9970 ? crtl->max_used_stack_slot_alignment
9971 : crtl->stack_alignment_needed));
9973 if (crtl->stack_realign_finalized)
9975 /* After stack_realign_needed is finalized, we can't no longer
9977 gcc_assert (crtl->stack_realign_needed == stack_realign);
9981 /* If the only reason for frame_pointer_needed is that we conservatively
9982 assumed stack realignment might be needed, but in the end nothing that
9983 needed the stack alignment had been spilled, clear frame_pointer_needed
9984 and say we don't need stack realignment. */
9987 && frame_pointer_needed
9988 && current_function_is_leaf
9989 && flag_omit_frame_pointer
9990 && current_function_sp_is_unchanging
9991 && !ix86_current_function_calls_tls_descriptor
9992 && !crtl->accesses_prior_frames
9993 && !cfun->calls_alloca
9994 && !crtl->calls_eh_return
9995 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
9996 && !ix86_frame_pointer_required ()
9997 && get_frame_size () == 0
9998 && ix86_nsaved_sseregs () == 0
9999 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10001 HARD_REG_SET set_up_by_prologue, prologue_used;
10004 CLEAR_HARD_REG_SET (prologue_used);
10005 CLEAR_HARD_REG_SET (set_up_by_prologue);
10006 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10007 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10008 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10009 HARD_FRAME_POINTER_REGNUM);
10013 FOR_BB_INSNS (bb, insn)
10014 if (NONDEBUG_INSN_P (insn)
10015 && requires_stack_frame_p (insn, prologue_used,
10016 set_up_by_prologue))
10018 crtl->stack_realign_needed = stack_realign;
10019 crtl->stack_realign_finalized = true;
10024 frame_pointer_needed = false;
10025 stack_realign = false;
10026 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10027 crtl->stack_alignment_needed = incoming_stack_boundary;
10028 crtl->stack_alignment_estimated = incoming_stack_boundary;
10029 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10030 crtl->preferred_stack_boundary = incoming_stack_boundary;
10031 df_finish_pass (true);
10032 df_scan_alloc (NULL);
10034 df_compute_regs_ever_live (true);
10038 crtl->stack_realign_needed = stack_realign;
10039 crtl->stack_realign_finalized = true;
10042 /* Expand the prologue into a bunch of separate insns. */
10045 ix86_expand_prologue (void)
10047 struct machine_function *m = cfun->machine;
10050 struct ix86_frame frame;
10051 HOST_WIDE_INT allocate;
10052 bool int_registers_saved;
10053 bool sse_registers_saved;
10055 ix86_finalize_stack_realign_flags ();
10057 /* DRAP should not coexist with stack_realign_fp */
10058 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10060 memset (&m->fs, 0, sizeof (m->fs));
10062 /* Initialize CFA state for before the prologue. */
10063 m->fs.cfa_reg = stack_pointer_rtx;
10064 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10066 /* Track SP offset to the CFA. We continue tracking this after we've
10067 swapped the CFA register away from SP. In the case of re-alignment
10068 this is fudged; we're interested to offsets within the local frame. */
10069 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10070 m->fs.sp_valid = true;
10072 ix86_compute_frame_layout (&frame);
10074 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10076 /* We should have already generated an error for any use of
10077 ms_hook on a nested function. */
10078 gcc_checking_assert (!ix86_static_chain_on_stack);
10080 /* Check if profiling is active and we shall use profiling before
10081 prologue variant. If so sorry. */
10082 if (crtl->profile && flag_fentry != 0)
10083 sorry ("ms_hook_prologue attribute isn%'t compatible "
10084 "with -mfentry for 32-bit");
10086 /* In ix86_asm_output_function_label we emitted:
10087 8b ff movl.s %edi,%edi
10089 8b ec movl.s %esp,%ebp
10091 This matches the hookable function prologue in Win32 API
10092 functions in Microsoft Windows XP Service Pack 2 and newer.
10093 Wine uses this to enable Windows apps to hook the Win32 API
10094 functions provided by Wine.
10096 What that means is that we've already set up the frame pointer. */
10098 if (frame_pointer_needed
10099 && !(crtl->drap_reg && crtl->stack_realign_needed))
10103 /* We've decided to use the frame pointer already set up.
10104 Describe this to the unwinder by pretending that both
10105 push and mov insns happen right here.
10107 Putting the unwind info here at the end of the ms_hook
10108 is done so that we can make absolutely certain we get
10109 the required byte sequence at the start of the function,
10110 rather than relying on an assembler that can produce
10111 the exact encoding required.
10113 However it does mean (in the unpatched case) that we have
10114 a 1 insn window where the asynchronous unwind info is
10115 incorrect. However, if we placed the unwind info at
10116 its correct location we would have incorrect unwind info
10117 in the patched case. Which is probably all moot since
10118 I don't expect Wine generates dwarf2 unwind info for the
10119 system libraries that use this feature. */
10121 insn = emit_insn (gen_blockage ());
10123 push = gen_push (hard_frame_pointer_rtx);
10124 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10125 stack_pointer_rtx);
10126 RTX_FRAME_RELATED_P (push) = 1;
10127 RTX_FRAME_RELATED_P (mov) = 1;
10129 RTX_FRAME_RELATED_P (insn) = 1;
10130 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10131 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10133 /* Note that gen_push incremented m->fs.cfa_offset, even
10134 though we didn't emit the push insn here. */
10135 m->fs.cfa_reg = hard_frame_pointer_rtx;
10136 m->fs.fp_offset = m->fs.cfa_offset;
10137 m->fs.fp_valid = true;
10141 /* The frame pointer is not needed so pop %ebp again.
10142 This leaves us with a pristine state. */
10143 emit_insn (gen_pop (hard_frame_pointer_rtx));
10147 /* The first insn of a function that accepts its static chain on the
10148 stack is to push the register that would be filled in by a direct
10149 call. This insn will be skipped by the trampoline. */
10150 else if (ix86_static_chain_on_stack)
10152 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10153 emit_insn (gen_blockage ());
10155 /* We don't want to interpret this push insn as a register save,
10156 only as a stack adjustment. The real copy of the register as
10157 a save will be done later, if needed. */
10158 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10159 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10160 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10161 RTX_FRAME_RELATED_P (insn) = 1;
10164 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10165 of DRAP is needed and stack realignment is really needed after reload */
10166 if (stack_realign_drap)
10168 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10170 /* Only need to push parameter pointer reg if it is caller saved. */
10171 if (!call_used_regs[REGNO (crtl->drap_reg)])
10173 /* Push arg pointer reg */
10174 insn = emit_insn (gen_push (crtl->drap_reg));
10175 RTX_FRAME_RELATED_P (insn) = 1;
10178 /* Grab the argument pointer. */
10179 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10180 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10181 RTX_FRAME_RELATED_P (insn) = 1;
10182 m->fs.cfa_reg = crtl->drap_reg;
10183 m->fs.cfa_offset = 0;
10185 /* Align the stack. */
10186 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10188 GEN_INT (-align_bytes)));
10189 RTX_FRAME_RELATED_P (insn) = 1;
10191 /* Replicate the return address on the stack so that return
10192 address can be reached via (argp - 1) slot. This is needed
10193 to implement macro RETURN_ADDR_RTX and intrinsic function
10194 expand_builtin_return_addr etc. */
10195 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10196 t = gen_frame_mem (Pmode, t);
10197 insn = emit_insn (gen_push (t));
10198 RTX_FRAME_RELATED_P (insn) = 1;
10200 /* For the purposes of frame and register save area addressing,
10201 we've started over with a new frame. */
10202 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10203 m->fs.realigned = true;
10206 int_registers_saved = (frame.nregs == 0);
10207 sse_registers_saved = (frame.nsseregs == 0);
10209 if (frame_pointer_needed && !m->fs.fp_valid)
10211 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10212 slower on all targets. Also sdb doesn't like it. */
10213 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10214 RTX_FRAME_RELATED_P (insn) = 1;
10216 /* Push registers now, before setting the frame pointer
10218 if (!int_registers_saved
10220 && !frame.save_regs_using_mov)
10222 ix86_emit_save_regs ();
10223 int_registers_saved = true;
10224 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10227 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10229 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10230 RTX_FRAME_RELATED_P (insn) = 1;
10232 if (m->fs.cfa_reg == stack_pointer_rtx)
10233 m->fs.cfa_reg = hard_frame_pointer_rtx;
10234 m->fs.fp_offset = m->fs.sp_offset;
10235 m->fs.fp_valid = true;
10239 if (!int_registers_saved)
10241 /* If saving registers via PUSH, do so now. */
10242 if (!frame.save_regs_using_mov)
10244 ix86_emit_save_regs ();
10245 int_registers_saved = true;
10246 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10249 /* When using red zone we may start register saving before allocating
10250 the stack frame saving one cycle of the prologue. However, avoid
10251 doing this if we have to probe the stack; at least on x86_64 the
10252 stack probe can turn into a call that clobbers a red zone location. */
10253 else if (ix86_using_red_zone ()
10254 && (! TARGET_STACK_PROBE
10255 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10257 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10258 int_registers_saved = true;
10262 if (stack_realign_fp)
10264 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10265 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10267 /* The computation of the size of the re-aligned stack frame means
10268 that we must allocate the size of the register save area before
10269 performing the actual alignment. Otherwise we cannot guarantee
10270 that there's enough storage above the realignment point. */
10271 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10272 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10273 GEN_INT (m->fs.sp_offset
10274 - frame.sse_reg_save_offset),
10277 /* Align the stack. */
10278 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10280 GEN_INT (-align_bytes)));
10282 /* For the purposes of register save area addressing, the stack
10283 pointer is no longer valid. As for the value of sp_offset,
10284 see ix86_compute_frame_layout, which we need to match in order
10285 to pass verification of stack_pointer_offset at the end. */
10286 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10287 m->fs.sp_valid = false;
10290 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10292 if (flag_stack_usage_info)
10294 /* We start to count from ARG_POINTER. */
10295 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10297 /* If it was realigned, take into account the fake frame. */
10298 if (stack_realign_drap)
10300 if (ix86_static_chain_on_stack)
10301 stack_size += UNITS_PER_WORD;
10303 if (!call_used_regs[REGNO (crtl->drap_reg)])
10304 stack_size += UNITS_PER_WORD;
10306 /* This over-estimates by 1 minimal-stack-alignment-unit but
10307 mitigates that by counting in the new return address slot. */
10308 current_function_dynamic_stack_size
10309 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10312 current_function_static_stack_size = stack_size;
10315 /* On SEH target with very large frame size, allocate an area to save
10316 SSE registers (as the very large allocation won't be described). */
10318 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10319 && !sse_registers_saved)
10321 HOST_WIDE_INT sse_size =
10322 frame.sse_reg_save_offset - frame.reg_save_offset;
10324 gcc_assert (int_registers_saved);
10326 /* No need to do stack checking as the area will be immediately
10328 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10329 GEN_INT (-sse_size), -1,
10330 m->fs.cfa_reg == stack_pointer_rtx);
10331 allocate -= sse_size;
10332 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10333 sse_registers_saved = true;
10336 /* The stack has already been decremented by the instruction calling us
10337 so probe if the size is non-negative to preserve the protection area. */
10338 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10340 /* We expect the registers to be saved when probes are used. */
10341 gcc_assert (int_registers_saved);
10343 if (STACK_CHECK_MOVING_SP)
10345 ix86_adjust_stack_and_probe (allocate);
10350 HOST_WIDE_INT size = allocate;
10352 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10353 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10355 if (TARGET_STACK_PROBE)
10356 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10358 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10364 else if (!ix86_target_stack_probe ()
10365 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10367 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10368 GEN_INT (-allocate), -1,
10369 m->fs.cfa_reg == stack_pointer_rtx);
10373 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10375 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10376 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
10377 bool eax_live = false;
10378 bool r10_live = false;
10381 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10382 if (!TARGET_64BIT_MS_ABI)
10383 eax_live = ix86_eax_live_at_start_p ();
10385 /* Note that SEH directives need to continue tracking the stack
10386 pointer even after the frame pointer has been set up. */
10389 insn = emit_insn (gen_push (eax));
10390 allocate -= UNITS_PER_WORD;
10391 if (sp_is_cfa_reg || TARGET_SEH)
10394 m->fs.cfa_offset += UNITS_PER_WORD;
10395 RTX_FRAME_RELATED_P (insn) = 1;
10401 r10 = gen_rtx_REG (Pmode, R10_REG);
10402 insn = emit_insn (gen_push (r10));
10403 allocate -= UNITS_PER_WORD;
10404 if (sp_is_cfa_reg || TARGET_SEH)
10407 m->fs.cfa_offset += UNITS_PER_WORD;
10408 RTX_FRAME_RELATED_P (insn) = 1;
10412 emit_move_insn (eax, GEN_INT (allocate));
10413 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10415 /* Use the fact that AX still contains ALLOCATE. */
10416 adjust_stack_insn = (TARGET_64BIT
10417 ? gen_pro_epilogue_adjust_stack_di_sub
10418 : gen_pro_epilogue_adjust_stack_si_sub);
10420 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10421 stack_pointer_rtx, eax));
10423 if (sp_is_cfa_reg || TARGET_SEH)
10426 m->fs.cfa_offset += allocate;
10427 RTX_FRAME_RELATED_P (insn) = 1;
10428 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10429 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10430 plus_constant (stack_pointer_rtx,
10433 m->fs.sp_offset += allocate;
10435 if (r10_live && eax_live)
10437 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
10438 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10439 t = plus_constant (t, UNITS_PER_WORD);
10440 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10442 else if (eax_live || r10_live)
10444 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
10445 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10448 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10450 /* If we havn't already set up the frame pointer, do so now. */
10451 if (frame_pointer_needed && !m->fs.fp_valid)
10453 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10454 GEN_INT (frame.stack_pointer_offset
10455 - frame.hard_frame_pointer_offset));
10456 insn = emit_insn (insn);
10457 RTX_FRAME_RELATED_P (insn) = 1;
10458 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10460 if (m->fs.cfa_reg == stack_pointer_rtx)
10461 m->fs.cfa_reg = hard_frame_pointer_rtx;
10462 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10463 m->fs.fp_valid = true;
10466 if (!int_registers_saved)
10467 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10468 if (!sse_registers_saved)
10469 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10471 pic_reg_used = false;
10472 if (pic_offset_table_rtx
10473 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10476 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10478 if (alt_pic_reg_used != INVALID_REGNUM)
10479 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10481 pic_reg_used = true;
10488 if (ix86_cmodel == CM_LARGE_PIC)
10490 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10491 rtx label = gen_label_rtx ();
10492 emit_label (label);
10493 LABEL_PRESERVE_P (label) = 1;
10494 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10495 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10496 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10497 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10498 pic_offset_table_rtx, tmp_reg));
10501 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10505 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10506 RTX_FRAME_RELATED_P (insn) = 1;
10507 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10511 /* In the pic_reg_used case, make sure that the got load isn't deleted
10512 when mcount needs it. Blockage to avoid call movement across mcount
10513 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10515 if (crtl->profile && !flag_fentry && pic_reg_used)
10516 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10518 if (crtl->drap_reg && !crtl->stack_realign_needed)
10520 /* vDRAP is setup but after reload it turns out stack realign
10521 isn't necessary, here we will emit prologue to setup DRAP
10522 without stack realign adjustment */
10523 t = choose_baseaddr (0);
10524 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10527 /* Prevent instructions from being scheduled into register save push
10528 sequence when access to the redzone area is done through frame pointer.
10529 The offset between the frame pointer and the stack pointer is calculated
10530 relative to the value of the stack pointer at the end of the function
10531 prologue, and moving instructions that access redzone area via frame
10532 pointer inside push sequence violates this assumption. */
10533 if (frame_pointer_needed && frame.red_zone_size)
10534 emit_insn (gen_memory_blockage ());
10536 /* Emit cld instruction if stringops are used in the function. */
10537 if (TARGET_CLD && ix86_current_function_needs_cld)
10538 emit_insn (gen_cld ());
10540 /* SEH requires that the prologue end within 256 bytes of the start of
10541 the function. Prevent instruction schedules that would extend that.
10542 Further, prevent alloca modifications to the stack pointer from being
10543 combined with prologue modifications. */
10545 emit_insn (gen_prologue_use (stack_pointer_rtx));
10548 /* Emit code to restore REG using a POP insn. */
10551 ix86_emit_restore_reg_using_pop (rtx reg)
10553 struct machine_function *m = cfun->machine;
10554 rtx insn = emit_insn (gen_pop (reg));
10556 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10557 m->fs.sp_offset -= UNITS_PER_WORD;
10559 if (m->fs.cfa_reg == crtl->drap_reg
10560 && REGNO (reg) == REGNO (crtl->drap_reg))
10562 /* Previously we'd represented the CFA as an expression
10563 like *(%ebp - 8). We've just popped that value from
10564 the stack, which means we need to reset the CFA to
10565 the drap register. This will remain until we restore
10566 the stack pointer. */
10567 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10568 RTX_FRAME_RELATED_P (insn) = 1;
10570 /* This means that the DRAP register is valid for addressing too. */
10571 m->fs.drap_valid = true;
10575 if (m->fs.cfa_reg == stack_pointer_rtx)
10577 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10578 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10579 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10580 RTX_FRAME_RELATED_P (insn) = 1;
10582 m->fs.cfa_offset -= UNITS_PER_WORD;
10585 /* When the frame pointer is the CFA, and we pop it, we are
10586 swapping back to the stack pointer as the CFA. This happens
10587 for stack frames that don't allocate other data, so we assume
10588 the stack pointer is now pointing at the return address, i.e.
10589 the function entry state, which makes the offset be 1 word. */
10590 if (reg == hard_frame_pointer_rtx)
10592 m->fs.fp_valid = false;
10593 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10595 m->fs.cfa_reg = stack_pointer_rtx;
10596 m->fs.cfa_offset -= UNITS_PER_WORD;
10598 add_reg_note (insn, REG_CFA_DEF_CFA,
10599 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10600 GEN_INT (m->fs.cfa_offset)));
10601 RTX_FRAME_RELATED_P (insn) = 1;
10606 /* Emit code to restore saved registers using POP insns. */
10609 ix86_emit_restore_regs_using_pop (void)
10611 unsigned int regno;
10613 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10614 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10615 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10618 /* Emit code and notes for the LEAVE instruction. */
10621 ix86_emit_leave (void)
10623 struct machine_function *m = cfun->machine;
10624 rtx insn = emit_insn (ix86_gen_leave ());
10626 ix86_add_queued_cfa_restore_notes (insn);
10628 gcc_assert (m->fs.fp_valid);
10629 m->fs.sp_valid = true;
10630 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10631 m->fs.fp_valid = false;
10633 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10635 m->fs.cfa_reg = stack_pointer_rtx;
10636 m->fs.cfa_offset = m->fs.sp_offset;
10638 add_reg_note (insn, REG_CFA_DEF_CFA,
10639 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10640 RTX_FRAME_RELATED_P (insn) = 1;
10642 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10646 /* Emit code to restore saved registers using MOV insns.
10647 First register is restored from CFA - CFA_OFFSET. */
10649 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10650 bool maybe_eh_return)
10652 struct machine_function *m = cfun->machine;
10653 unsigned int regno;
10655 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10656 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10658 rtx reg = gen_rtx_REG (Pmode, regno);
10661 mem = choose_baseaddr (cfa_offset);
10662 mem = gen_frame_mem (Pmode, mem);
10663 insn = emit_move_insn (reg, mem);
10665 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10667 /* Previously we'd represented the CFA as an expression
10668 like *(%ebp - 8). We've just popped that value from
10669 the stack, which means we need to reset the CFA to
10670 the drap register. This will remain until we restore
10671 the stack pointer. */
10672 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10673 RTX_FRAME_RELATED_P (insn) = 1;
10675 /* This means that the DRAP register is valid for addressing. */
10676 m->fs.drap_valid = true;
10679 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10681 cfa_offset -= UNITS_PER_WORD;
10685 /* Emit code to restore saved registers using MOV insns.
10686 First register is restored from CFA - CFA_OFFSET. */
10688 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10689 bool maybe_eh_return)
10691 unsigned int regno;
10693 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10694 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10696 rtx reg = gen_rtx_REG (V4SFmode, regno);
10699 mem = choose_baseaddr (cfa_offset);
10700 mem = gen_rtx_MEM (V4SFmode, mem);
10701 set_mem_align (mem, 128);
10702 emit_move_insn (reg, mem);
10704 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10710 /* Emit vzeroupper if needed. */
10713 ix86_maybe_emit_epilogue_vzeroupper (void)
10715 if (TARGET_VZEROUPPER
10716 && !TREE_THIS_VOLATILE (cfun->decl)
10717 && !cfun->machine->caller_return_avx256_p)
10718 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10721 /* Restore function stack, frame, and registers. */
10724 ix86_expand_epilogue (int style)
10726 struct machine_function *m = cfun->machine;
10727 struct machine_frame_state frame_state_save = m->fs;
10728 struct ix86_frame frame;
10729 bool restore_regs_via_mov;
10732 ix86_finalize_stack_realign_flags ();
10733 ix86_compute_frame_layout (&frame);
10735 m->fs.sp_valid = (!frame_pointer_needed
10736 || (current_function_sp_is_unchanging
10737 && !stack_realign_fp));
10738 gcc_assert (!m->fs.sp_valid
10739 || m->fs.sp_offset == frame.stack_pointer_offset);
10741 /* The FP must be valid if the frame pointer is present. */
10742 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10743 gcc_assert (!m->fs.fp_valid
10744 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10746 /* We must have *some* valid pointer to the stack frame. */
10747 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10749 /* The DRAP is never valid at this point. */
10750 gcc_assert (!m->fs.drap_valid);
10752 /* See the comment about red zone and frame
10753 pointer usage in ix86_expand_prologue. */
10754 if (frame_pointer_needed && frame.red_zone_size)
10755 emit_insn (gen_memory_blockage ());
10757 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10758 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10760 /* Determine the CFA offset of the end of the red-zone. */
10761 m->fs.red_zone_offset = 0;
10762 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10764 /* The red-zone begins below the return address. */
10765 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10767 /* When the register save area is in the aligned portion of
10768 the stack, determine the maximum runtime displacement that
10769 matches up with the aligned frame. */
10770 if (stack_realign_drap)
10771 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10775 /* Special care must be taken for the normal return case of a function
10776 using eh_return: the eax and edx registers are marked as saved, but
10777 not restored along this path. Adjust the save location to match. */
10778 if (crtl->calls_eh_return && style != 2)
10779 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10781 /* EH_RETURN requires the use of moves to function properly. */
10782 if (crtl->calls_eh_return)
10783 restore_regs_via_mov = true;
10784 /* SEH requires the use of pops to identify the epilogue. */
10785 else if (TARGET_SEH)
10786 restore_regs_via_mov = false;
10787 /* If we're only restoring one register and sp is not valid then
10788 using a move instruction to restore the register since it's
10789 less work than reloading sp and popping the register. */
10790 else if (!m->fs.sp_valid && frame.nregs <= 1)
10791 restore_regs_via_mov = true;
10792 else if (TARGET_EPILOGUE_USING_MOVE
10793 && cfun->machine->use_fast_prologue_epilogue
10794 && (frame.nregs > 1
10795 || m->fs.sp_offset != frame.reg_save_offset))
10796 restore_regs_via_mov = true;
10797 else if (frame_pointer_needed
10799 && m->fs.sp_offset != frame.reg_save_offset)
10800 restore_regs_via_mov = true;
10801 else if (frame_pointer_needed
10802 && TARGET_USE_LEAVE
10803 && cfun->machine->use_fast_prologue_epilogue
10804 && frame.nregs == 1)
10805 restore_regs_via_mov = true;
10807 restore_regs_via_mov = false;
10809 if (restore_regs_via_mov || frame.nsseregs)
10811 /* Ensure that the entire register save area is addressable via
10812 the stack pointer, if we will restore via sp. */
10814 && m->fs.sp_offset > 0x7fffffff
10815 && !(m->fs.fp_valid || m->fs.drap_valid)
10816 && (frame.nsseregs + frame.nregs) != 0)
10818 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10819 GEN_INT (m->fs.sp_offset
10820 - frame.sse_reg_save_offset),
10822 m->fs.cfa_reg == stack_pointer_rtx);
10826 /* If there are any SSE registers to restore, then we have to do it
10827 via moves, since there's obviously no pop for SSE regs. */
10828 if (frame.nsseregs)
10829 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10832 if (restore_regs_via_mov)
10837 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10839 /* eh_return epilogues need %ecx added to the stack pointer. */
10842 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10844 /* Stack align doesn't work with eh_return. */
10845 gcc_assert (!stack_realign_drap);
10846 /* Neither does regparm nested functions. */
10847 gcc_assert (!ix86_static_chain_on_stack);
10849 if (frame_pointer_needed)
10851 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10852 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10853 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10855 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10856 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10858 /* Note that we use SA as a temporary CFA, as the return
10859 address is at the proper place relative to it. We
10860 pretend this happens at the FP restore insn because
10861 prior to this insn the FP would be stored at the wrong
10862 offset relative to SA, and after this insn we have no
10863 other reasonable register to use for the CFA. We don't
10864 bother resetting the CFA to the SP for the duration of
10865 the return insn. */
10866 add_reg_note (insn, REG_CFA_DEF_CFA,
10867 plus_constant (sa, UNITS_PER_WORD));
10868 ix86_add_queued_cfa_restore_notes (insn);
10869 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10870 RTX_FRAME_RELATED_P (insn) = 1;
10872 m->fs.cfa_reg = sa;
10873 m->fs.cfa_offset = UNITS_PER_WORD;
10874 m->fs.fp_valid = false;
10876 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10877 const0_rtx, style, false);
10881 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10882 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10883 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10884 ix86_add_queued_cfa_restore_notes (insn);
10886 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10887 if (m->fs.cfa_offset != UNITS_PER_WORD)
10889 m->fs.cfa_offset = UNITS_PER_WORD;
10890 add_reg_note (insn, REG_CFA_DEF_CFA,
10891 plus_constant (stack_pointer_rtx,
10893 RTX_FRAME_RELATED_P (insn) = 1;
10896 m->fs.sp_offset = UNITS_PER_WORD;
10897 m->fs.sp_valid = true;
10902 /* SEH requires that the function end with (1) a stack adjustment
10903 if necessary, (2) a sequence of pops, and (3) a return or
10904 jump instruction. Prevent insns from the function body from
10905 being scheduled into this sequence. */
10908 /* Prevent a catch region from being adjacent to the standard
10909 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10910 several other flags that would be interesting to test are
10912 if (flag_non_call_exceptions)
10913 emit_insn (gen_nops (const1_rtx));
10915 emit_insn (gen_blockage ());
10918 /* First step is to deallocate the stack frame so that we can
10919 pop the registers. Also do it on SEH target for very large
10920 frame as the emitted instructions aren't allowed by the ABI in
10922 if (!m->fs.sp_valid
10924 && (m->fs.sp_offset - frame.reg_save_offset
10925 >= SEH_MAX_FRAME_SIZE)))
10927 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10928 GEN_INT (m->fs.fp_offset
10929 - frame.reg_save_offset),
10932 else if (m->fs.sp_offset != frame.reg_save_offset)
10934 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10935 GEN_INT (m->fs.sp_offset
10936 - frame.reg_save_offset),
10938 m->fs.cfa_reg == stack_pointer_rtx);
10941 ix86_emit_restore_regs_using_pop ();
10944 /* If we used a stack pointer and haven't already got rid of it,
10946 if (m->fs.fp_valid)
10948 /* If the stack pointer is valid and pointing at the frame
10949 pointer store address, then we only need a pop. */
10950 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10951 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10952 /* Leave results in shorter dependency chains on CPUs that are
10953 able to grok it fast. */
10954 else if (TARGET_USE_LEAVE
10955 || optimize_function_for_size_p (cfun)
10956 || !cfun->machine->use_fast_prologue_epilogue)
10957 ix86_emit_leave ();
10960 pro_epilogue_adjust_stack (stack_pointer_rtx,
10961 hard_frame_pointer_rtx,
10962 const0_rtx, style, !using_drap);
10963 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10969 int param_ptr_offset = UNITS_PER_WORD;
10972 gcc_assert (stack_realign_drap);
10974 if (ix86_static_chain_on_stack)
10975 param_ptr_offset += UNITS_PER_WORD;
10976 if (!call_used_regs[REGNO (crtl->drap_reg)])
10977 param_ptr_offset += UNITS_PER_WORD;
10979 insn = emit_insn (gen_rtx_SET
10980 (VOIDmode, stack_pointer_rtx,
10981 gen_rtx_PLUS (Pmode,
10983 GEN_INT (-param_ptr_offset))));
10984 m->fs.cfa_reg = stack_pointer_rtx;
10985 m->fs.cfa_offset = param_ptr_offset;
10986 m->fs.sp_offset = param_ptr_offset;
10987 m->fs.realigned = false;
10989 add_reg_note (insn, REG_CFA_DEF_CFA,
10990 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10991 GEN_INT (param_ptr_offset)));
10992 RTX_FRAME_RELATED_P (insn) = 1;
10994 if (!call_used_regs[REGNO (crtl->drap_reg)])
10995 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10998 /* At this point the stack pointer must be valid, and we must have
10999 restored all of the registers. We may not have deallocated the
11000 entire stack frame. We've delayed this until now because it may
11001 be possible to merge the local stack deallocation with the
11002 deallocation forced by ix86_static_chain_on_stack. */
11003 gcc_assert (m->fs.sp_valid);
11004 gcc_assert (!m->fs.fp_valid);
11005 gcc_assert (!m->fs.realigned);
11006 if (m->fs.sp_offset != UNITS_PER_WORD)
11008 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11009 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11013 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11015 /* Sibcall epilogues don't want a return instruction. */
11018 m->fs = frame_state_save;
11022 /* Emit vzeroupper if needed. */
11023 ix86_maybe_emit_epilogue_vzeroupper ();
11025 if (crtl->args.pops_args && crtl->args.size)
11027 rtx popc = GEN_INT (crtl->args.pops_args);
11029 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11030 address, do explicit add, and jump indirectly to the caller. */
11032 if (crtl->args.pops_args >= 65536)
11034 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11037 /* There is no "pascal" calling convention in any 64bit ABI. */
11038 gcc_assert (!TARGET_64BIT);
11040 insn = emit_insn (gen_pop (ecx));
11041 m->fs.cfa_offset -= UNITS_PER_WORD;
11042 m->fs.sp_offset -= UNITS_PER_WORD;
11044 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11045 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11046 add_reg_note (insn, REG_CFA_REGISTER,
11047 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11048 RTX_FRAME_RELATED_P (insn) = 1;
11050 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11052 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11055 emit_jump_insn (gen_simple_return_pop_internal (popc));
11058 emit_jump_insn (gen_simple_return_internal ());
11060 /* Restore the state back to the state from the prologue,
11061 so that it's correct for the next epilogue. */
11062 m->fs = frame_state_save;
11065 /* Reset from the function's potential modifications. */
11068 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11069 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11071 if (pic_offset_table_rtx)
11072 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11074 /* Mach-O doesn't support labels at the end of objects, so if
11075 it looks like we might want one, insert a NOP. */
11077 rtx insn = get_last_insn ();
11078 rtx deleted_debug_label = NULL_RTX;
11081 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11083 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11084 notes only, instead set their CODE_LABEL_NUMBER to -1,
11085 otherwise there would be code generation differences
11086 in between -g and -g0. */
11087 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11088 deleted_debug_label = insn;
11089 insn = PREV_INSN (insn);
11094 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11095 fputs ("\tnop\n", file);
11096 else if (deleted_debug_label)
11097 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11098 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11099 CODE_LABEL_NUMBER (insn) = -1;
11105 /* Return a scratch register to use in the split stack prologue. The
11106 split stack prologue is used for -fsplit-stack. It is the first
11107 instructions in the function, even before the regular prologue.
11108 The scratch register can be any caller-saved register which is not
11109 used for parameters or for the static chain. */
11111 static unsigned int
11112 split_stack_prologue_scratch_regno (void)
11118 bool is_fastcall, is_thiscall;
11121 is_fastcall = (lookup_attribute ("fastcall",
11122 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11124 is_thiscall = (lookup_attribute ("thiscall",
11125 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11127 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11131 if (DECL_STATIC_CHAIN (cfun->decl))
11133 sorry ("-fsplit-stack does not support fastcall with "
11134 "nested function");
11135 return INVALID_REGNUM;
11139 else if (is_thiscall)
11141 if (!DECL_STATIC_CHAIN (cfun->decl))
11145 else if (regparm < 3)
11147 if (!DECL_STATIC_CHAIN (cfun->decl))
11153 sorry ("-fsplit-stack does not support 2 register "
11154 " parameters for a nested function");
11155 return INVALID_REGNUM;
11162 /* FIXME: We could make this work by pushing a register
11163 around the addition and comparison. */
11164 sorry ("-fsplit-stack does not support 3 register parameters");
11165 return INVALID_REGNUM;
11170 /* A SYMBOL_REF for the function which allocates new stackspace for
11173 static GTY(()) rtx split_stack_fn;
11175 /* A SYMBOL_REF for the more stack function when using the large
11178 static GTY(()) rtx split_stack_fn_large;
11180 /* Handle -fsplit-stack. These are the first instructions in the
11181 function, even before the regular prologue. */
11184 ix86_expand_split_stack_prologue (void)
11186 struct ix86_frame frame;
11187 HOST_WIDE_INT allocate;
11188 unsigned HOST_WIDE_INT args_size;
11189 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11190 rtx scratch_reg = NULL_RTX;
11191 rtx varargs_label = NULL_RTX;
11194 gcc_assert (flag_split_stack && reload_completed);
11196 ix86_finalize_stack_realign_flags ();
11197 ix86_compute_frame_layout (&frame);
11198 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11200 /* This is the label we will branch to if we have enough stack
11201 space. We expect the basic block reordering pass to reverse this
11202 branch if optimizing, so that we branch in the unlikely case. */
11203 label = gen_label_rtx ();
11205 /* We need to compare the stack pointer minus the frame size with
11206 the stack boundary in the TCB. The stack boundary always gives
11207 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11208 can compare directly. Otherwise we need to do an addition. */
11210 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11211 UNSPEC_STACK_CHECK);
11212 limit = gen_rtx_CONST (Pmode, limit);
11213 limit = gen_rtx_MEM (Pmode, limit);
11214 if (allocate < SPLIT_STACK_AVAILABLE)
11215 current = stack_pointer_rtx;
11218 unsigned int scratch_regno;
11221 /* We need a scratch register to hold the stack pointer minus
11222 the required frame size. Since this is the very start of the
11223 function, the scratch register can be any caller-saved
11224 register which is not used for parameters. */
11225 offset = GEN_INT (- allocate);
11226 scratch_regno = split_stack_prologue_scratch_regno ();
11227 if (scratch_regno == INVALID_REGNUM)
11229 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11230 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11232 /* We don't use ix86_gen_add3 in this case because it will
11233 want to split to lea, but when not optimizing the insn
11234 will not be split after this point. */
11235 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11236 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11241 emit_move_insn (scratch_reg, offset);
11242 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11243 stack_pointer_rtx));
11245 current = scratch_reg;
11248 ix86_expand_branch (GEU, current, limit, label);
11249 jump_insn = get_last_insn ();
11250 JUMP_LABEL (jump_insn) = label;
11252 /* Mark the jump as very likely to be taken. */
11253 add_reg_note (jump_insn, REG_BR_PROB,
11254 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11256 if (split_stack_fn == NULL_RTX)
11257 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11258 fn = split_stack_fn;
11260 /* Get more stack space. We pass in the desired stack space and the
11261 size of the arguments to copy to the new stack. In 32-bit mode
11262 we push the parameters; __morestack will return on a new stack
11263 anyhow. In 64-bit mode we pass the parameters in r10 and
11265 allocate_rtx = GEN_INT (allocate);
11266 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11267 call_fusage = NULL_RTX;
11272 reg10 = gen_rtx_REG (Pmode, R10_REG);
11273 reg11 = gen_rtx_REG (Pmode, R11_REG);
11275 /* If this function uses a static chain, it will be in %r10.
11276 Preserve it across the call to __morestack. */
11277 if (DECL_STATIC_CHAIN (cfun->decl))
11281 rax = gen_rtx_REG (Pmode, AX_REG);
11282 emit_move_insn (rax, reg10);
11283 use_reg (&call_fusage, rax);
11286 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11288 HOST_WIDE_INT argval;
11290 /* When using the large model we need to load the address
11291 into a register, and we've run out of registers. So we
11292 switch to a different calling convention, and we call a
11293 different function: __morestack_large. We pass the
11294 argument size in the upper 32 bits of r10 and pass the
11295 frame size in the lower 32 bits. */
11296 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11297 gcc_assert ((args_size & 0xffffffff) == args_size);
11299 if (split_stack_fn_large == NULL_RTX)
11300 split_stack_fn_large =
11301 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11303 if (ix86_cmodel == CM_LARGE_PIC)
11307 label = gen_label_rtx ();
11308 emit_label (label);
11309 LABEL_PRESERVE_P (label) = 1;
11310 emit_insn (gen_set_rip_rex64 (reg10, label));
11311 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11312 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11313 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11315 x = gen_rtx_CONST (Pmode, x);
11316 emit_move_insn (reg11, x);
11317 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11318 x = gen_const_mem (Pmode, x);
11319 emit_move_insn (reg11, x);
11322 emit_move_insn (reg11, split_stack_fn_large);
11326 argval = ((args_size << 16) << 16) + allocate;
11327 emit_move_insn (reg10, GEN_INT (argval));
11331 emit_move_insn (reg10, allocate_rtx);
11332 emit_move_insn (reg11, GEN_INT (args_size));
11333 use_reg (&call_fusage, reg11);
11336 use_reg (&call_fusage, reg10);
11340 emit_insn (gen_push (GEN_INT (args_size)));
11341 emit_insn (gen_push (allocate_rtx));
11343 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11344 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11346 add_function_usage_to (call_insn, call_fusage);
11348 /* In order to make call/return prediction work right, we now need
11349 to execute a return instruction. See
11350 libgcc/config/i386/morestack.S for the details on how this works.
11352 For flow purposes gcc must not see this as a return
11353 instruction--we need control flow to continue at the subsequent
11354 label. Therefore, we use an unspec. */
11355 gcc_assert (crtl->args.pops_args < 65536);
11356 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11358 /* If we are in 64-bit mode and this function uses a static chain,
11359 we saved %r10 in %rax before calling _morestack. */
11360 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11361 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11362 gen_rtx_REG (Pmode, AX_REG));
11364 /* If this function calls va_start, we need to store a pointer to
11365 the arguments on the old stack, because they may not have been
11366 all copied to the new stack. At this point the old stack can be
11367 found at the frame pointer value used by __morestack, because
11368 __morestack has set that up before calling back to us. Here we
11369 store that pointer in a scratch register, and in
11370 ix86_expand_prologue we store the scratch register in a stack
11372 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11374 unsigned int scratch_regno;
11378 scratch_regno = split_stack_prologue_scratch_regno ();
11379 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11380 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11384 return address within this function
11385 return address of caller of this function
11387 So we add three words to get to the stack arguments.
11391 return address within this function
11392 first argument to __morestack
11393 second argument to __morestack
11394 return address of caller of this function
11396 So we add five words to get to the stack arguments.
11398 words = TARGET_64BIT ? 3 : 5;
11399 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11400 gen_rtx_PLUS (Pmode, frame_reg,
11401 GEN_INT (words * UNITS_PER_WORD))));
11403 varargs_label = gen_label_rtx ();
11404 emit_jump_insn (gen_jump (varargs_label));
11405 JUMP_LABEL (get_last_insn ()) = varargs_label;
11410 emit_label (label);
11411 LABEL_NUSES (label) = 1;
11413 /* If this function calls va_start, we now have to set the scratch
11414 register for the case where we do not call __morestack. In this
11415 case we need to set it based on the stack pointer. */
11416 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11418 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11419 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11420 GEN_INT (UNITS_PER_WORD))));
11422 emit_label (varargs_label);
11423 LABEL_NUSES (varargs_label) = 1;
11427 /* We may have to tell the dataflow pass that the split stack prologue
11428 is initializing a scratch register. */
11431 ix86_live_on_entry (bitmap regs)
11433 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11435 gcc_assert (flag_split_stack);
11436 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11440 /* Extract the parts of an RTL expression that is a valid memory address
11441 for an instruction. Return 0 if the structure of the address is
11442 grossly off. Return -1 if the address contains ASHIFT, so it is not
11443 strictly valid, but still used for computing length of lea instruction. */
11446 ix86_decompose_address (rtx addr, struct ix86_address *out)
11448 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11449 rtx base_reg, index_reg;
11450 HOST_WIDE_INT scale = 1;
11451 rtx scale_rtx = NULL_RTX;
11454 enum ix86_address_seg seg = SEG_DEFAULT;
11456 /* Allow zero-extended SImode addresses,
11457 they will be emitted with addr32 prefix. */
11458 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11460 if (GET_CODE (addr) == ZERO_EXTEND
11461 && GET_MODE (XEXP (addr, 0)) == SImode)
11463 addr = XEXP (addr, 0);
11464 if (CONST_INT_P (addr))
11467 else if (GET_CODE (addr) == AND
11468 && const_32bit_mask (XEXP (addr, 1), DImode))
11470 addr = XEXP (addr, 0);
11472 /* Adjust SUBREGs. */
11473 if (GET_CODE (addr) == SUBREG
11474 && GET_MODE (SUBREG_REG (addr)) == SImode)
11476 addr = SUBREG_REG (addr);
11477 if (CONST_INT_P (addr))
11480 else if (GET_MODE (addr) == DImode)
11481 addr = gen_rtx_SUBREG (SImode, addr, 0);
11482 else if (GET_MODE (addr) != VOIDmode)
11487 /* Allow SImode subregs of DImode addresses,
11488 they will be emitted with addr32 prefix. */
11489 if (TARGET_64BIT && GET_MODE (addr) == SImode)
11491 if (GET_CODE (addr) == SUBREG
11492 && GET_MODE (SUBREG_REG (addr)) == DImode)
11494 addr = SUBREG_REG (addr);
11495 if (CONST_INT_P (addr))
11502 else if (GET_CODE (addr) == SUBREG)
11504 if (REG_P (SUBREG_REG (addr)))
11509 else if (GET_CODE (addr) == PLUS)
11511 rtx addends[4], op;
11519 addends[n++] = XEXP (op, 1);
11522 while (GET_CODE (op) == PLUS);
11527 for (i = n; i >= 0; --i)
11530 switch (GET_CODE (op))
11535 index = XEXP (op, 0);
11536 scale_rtx = XEXP (op, 1);
11542 index = XEXP (op, 0);
11543 tmp = XEXP (op, 1);
11544 if (!CONST_INT_P (tmp))
11546 scale = INTVAL (tmp);
11547 if ((unsigned HOST_WIDE_INT) scale > 3)
11549 scale = 1 << scale;
11553 if (XINT (op, 1) == UNSPEC_TP
11554 && TARGET_TLS_DIRECT_SEG_REFS
11555 && seg == SEG_DEFAULT)
11556 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11562 if (!REG_P (SUBREG_REG (op)))
11589 else if (GET_CODE (addr) == MULT)
11591 index = XEXP (addr, 0); /* index*scale */
11592 scale_rtx = XEXP (addr, 1);
11594 else if (GET_CODE (addr) == ASHIFT)
11596 /* We're called for lea too, which implements ashift on occasion. */
11597 index = XEXP (addr, 0);
11598 tmp = XEXP (addr, 1);
11599 if (!CONST_INT_P (tmp))
11601 scale = INTVAL (tmp);
11602 if ((unsigned HOST_WIDE_INT) scale > 3)
11604 scale = 1 << scale;
11608 disp = addr; /* displacement */
11614 else if (GET_CODE (index) == SUBREG
11615 && REG_P (SUBREG_REG (index)))
11621 /* Extract the integral value of scale. */
11624 if (!CONST_INT_P (scale_rtx))
11626 scale = INTVAL (scale_rtx);
11629 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11630 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11632 /* Avoid useless 0 displacement. */
11633 if (disp == const0_rtx && (base || index))
11636 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11637 if (base_reg && index_reg && scale == 1
11638 && (index_reg == arg_pointer_rtx
11639 || index_reg == frame_pointer_rtx
11640 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11643 tmp = base, base = index, index = tmp;
11644 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11647 /* Special case: %ebp cannot be encoded as a base without a displacement.
11651 && (base_reg == hard_frame_pointer_rtx
11652 || base_reg == frame_pointer_rtx
11653 || base_reg == arg_pointer_rtx
11654 || (REG_P (base_reg)
11655 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11656 || REGNO (base_reg) == R13_REG))))
11659 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11660 Avoid this by transforming to [%esi+0].
11661 Reload calls address legitimization without cfun defined, so we need
11662 to test cfun for being non-NULL. */
11663 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11664 && base_reg && !index_reg && !disp
11665 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11668 /* Special case: encode reg+reg instead of reg*2. */
11669 if (!base && index && scale == 2)
11670 base = index, base_reg = index_reg, scale = 1;
11672 /* Special case: scaling cannot be encoded without base or displacement. */
11673 if (!base && !disp && index && scale != 1)
11677 out->index = index;
11679 out->scale = scale;
11685 /* Return cost of the memory address x.
11686 For i386, it is better to use a complex address than let gcc copy
11687 the address into a reg and make a new pseudo. But not if the address
11688 requires to two regs - that would mean more pseudos with longer
11691 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11693 struct ix86_address parts;
11695 int ok = ix86_decompose_address (x, &parts);
11699 if (parts.base && GET_CODE (parts.base) == SUBREG)
11700 parts.base = SUBREG_REG (parts.base);
11701 if (parts.index && GET_CODE (parts.index) == SUBREG)
11702 parts.index = SUBREG_REG (parts.index);
11704 /* Attempt to minimize number of registers in the address. */
11706 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11708 && (!REG_P (parts.index)
11709 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11713 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11715 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11716 && parts.base != parts.index)
11719 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11720 since it's predecode logic can't detect the length of instructions
11721 and it degenerates to vector decoded. Increase cost of such
11722 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11723 to split such addresses or even refuse such addresses at all.
11725 Following addressing modes are affected:
11730 The first and last case may be avoidable by explicitly coding the zero in
11731 memory address, but I don't have AMD-K6 machine handy to check this
11735 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11736 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11737 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11743 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11744 this is used for to form addresses to local data when -fPIC is in
11748 darwin_local_data_pic (rtx disp)
11750 return (GET_CODE (disp) == UNSPEC
11751 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11754 /* Determine if a given RTX is a valid constant. We already know this
11755 satisfies CONSTANT_P. */
11758 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11760 switch (GET_CODE (x))
11765 if (GET_CODE (x) == PLUS)
11767 if (!CONST_INT_P (XEXP (x, 1)))
11772 if (TARGET_MACHO && darwin_local_data_pic (x))
11775 /* Only some unspecs are valid as "constants". */
11776 if (GET_CODE (x) == UNSPEC)
11777 switch (XINT (x, 1))
11780 case UNSPEC_GOTOFF:
11781 case UNSPEC_PLTOFF:
11782 return TARGET_64BIT;
11784 case UNSPEC_NTPOFF:
11785 x = XVECEXP (x, 0, 0);
11786 return (GET_CODE (x) == SYMBOL_REF
11787 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11788 case UNSPEC_DTPOFF:
11789 x = XVECEXP (x, 0, 0);
11790 return (GET_CODE (x) == SYMBOL_REF
11791 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11796 /* We must have drilled down to a symbol. */
11797 if (GET_CODE (x) == LABEL_REF)
11799 if (GET_CODE (x) != SYMBOL_REF)
11804 /* TLS symbols are never valid. */
11805 if (SYMBOL_REF_TLS_MODEL (x))
11808 /* DLLIMPORT symbols are never valid. */
11809 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11810 && SYMBOL_REF_DLLIMPORT_P (x))
11814 /* mdynamic-no-pic */
11815 if (MACHO_DYNAMIC_NO_PIC_P)
11816 return machopic_symbol_defined_p (x);
11821 if (GET_MODE (x) == TImode
11822 && x != CONST0_RTX (TImode)
11828 if (!standard_sse_constant_p (x))
11835 /* Otherwise we handle everything else in the move patterns. */
11839 /* Determine if it's legal to put X into the constant pool. This
11840 is not possible for the address of thread-local symbols, which
11841 is checked above. */
11844 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11846 /* We can always put integral constants and vectors in memory. */
11847 switch (GET_CODE (x))
11857 return !ix86_legitimate_constant_p (mode, x);
11861 /* Nonzero if the constant value X is a legitimate general operand
11862 when generating PIC code. It is given that flag_pic is on and
11863 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11866 legitimate_pic_operand_p (rtx x)
11870 switch (GET_CODE (x))
11873 inner = XEXP (x, 0);
11874 if (GET_CODE (inner) == PLUS
11875 && CONST_INT_P (XEXP (inner, 1)))
11876 inner = XEXP (inner, 0);
11878 /* Only some unspecs are valid as "constants". */
11879 if (GET_CODE (inner) == UNSPEC)
11880 switch (XINT (inner, 1))
11883 case UNSPEC_GOTOFF:
11884 case UNSPEC_PLTOFF:
11885 return TARGET_64BIT;
11887 x = XVECEXP (inner, 0, 0);
11888 return (GET_CODE (x) == SYMBOL_REF
11889 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11890 case UNSPEC_MACHOPIC_OFFSET:
11891 return legitimate_pic_address_disp_p (x);
11899 return legitimate_pic_address_disp_p (x);
11906 /* Determine if a given CONST RTX is a valid memory displacement
11910 legitimate_pic_address_disp_p (rtx disp)
11914 /* In 64bit mode we can allow direct addresses of symbols and labels
11915 when they are not dynamic symbols. */
11918 rtx op0 = disp, op1;
11920 switch (GET_CODE (disp))
11926 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11928 op0 = XEXP (XEXP (disp, 0), 0);
11929 op1 = XEXP (XEXP (disp, 0), 1);
11930 if (!CONST_INT_P (op1)
11931 || INTVAL (op1) >= 16*1024*1024
11932 || INTVAL (op1) < -16*1024*1024)
11934 if (GET_CODE (op0) == LABEL_REF)
11936 if (GET_CODE (op0) == CONST
11937 && GET_CODE (XEXP (op0, 0)) == UNSPEC
11938 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
11940 if (GET_CODE (op0) == UNSPEC
11941 && XINT (op0, 1) == UNSPEC_PCREL)
11943 if (GET_CODE (op0) != SYMBOL_REF)
11948 /* TLS references should always be enclosed in UNSPEC. */
11949 if (SYMBOL_REF_TLS_MODEL (op0))
11951 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11952 && ix86_cmodel != CM_LARGE_PIC)
11960 if (GET_CODE (disp) != CONST)
11962 disp = XEXP (disp, 0);
11966 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11967 of GOT tables. We should not need these anyway. */
11968 if (GET_CODE (disp) != UNSPEC
11969 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11970 && XINT (disp, 1) != UNSPEC_GOTOFF
11971 && XINT (disp, 1) != UNSPEC_PCREL
11972 && XINT (disp, 1) != UNSPEC_PLTOFF))
11975 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11976 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11982 if (GET_CODE (disp) == PLUS)
11984 if (!CONST_INT_P (XEXP (disp, 1)))
11986 disp = XEXP (disp, 0);
11990 if (TARGET_MACHO && darwin_local_data_pic (disp))
11993 if (GET_CODE (disp) != UNSPEC)
11996 switch (XINT (disp, 1))
12001 /* We need to check for both symbols and labels because VxWorks loads
12002 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12004 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12005 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12006 case UNSPEC_GOTOFF:
12007 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12008 While ABI specify also 32bit relocation but we don't produce it in
12009 small PIC model at all. */
12010 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12011 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12013 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12015 case UNSPEC_GOTTPOFF:
12016 case UNSPEC_GOTNTPOFF:
12017 case UNSPEC_INDNTPOFF:
12020 disp = XVECEXP (disp, 0, 0);
12021 return (GET_CODE (disp) == SYMBOL_REF
12022 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12023 case UNSPEC_NTPOFF:
12024 disp = XVECEXP (disp, 0, 0);
12025 return (GET_CODE (disp) == SYMBOL_REF
12026 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12027 case UNSPEC_DTPOFF:
12028 disp = XVECEXP (disp, 0, 0);
12029 return (GET_CODE (disp) == SYMBOL_REF
12030 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12036 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12037 replace the input X, or the original X if no replacement is called for.
12038 The output parameter *WIN is 1 if the calling macro should goto WIN,
12039 0 if it should not. */
12042 ix86_legitimize_reload_address (rtx x,
12043 enum machine_mode mode ATTRIBUTE_UNUSED,
12044 int opnum, int type,
12045 int ind_levels ATTRIBUTE_UNUSED)
12047 /* Reload can generate:
12049 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12053 This RTX is rejected from ix86_legitimate_address_p due to
12054 non-strictness of base register 97. Following this rejection,
12055 reload pushes all three components into separate registers,
12056 creating invalid memory address RTX.
12058 Following code reloads only the invalid part of the
12059 memory address RTX. */
12061 if (GET_CODE (x) == PLUS
12062 && REG_P (XEXP (x, 1))
12063 && GET_CODE (XEXP (x, 0)) == PLUS
12064 && REG_P (XEXP (XEXP (x, 0), 1)))
12067 bool something_reloaded = false;
12069 base = XEXP (XEXP (x, 0), 1);
12070 if (!REG_OK_FOR_BASE_STRICT_P (base))
12072 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12073 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12074 opnum, (enum reload_type)type);
12075 something_reloaded = true;
12078 index = XEXP (x, 1);
12079 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12081 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12082 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12083 opnum, (enum reload_type)type);
12084 something_reloaded = true;
12087 gcc_assert (something_reloaded);
12094 /* Determine if op is suitable RTX for an address register.
12095 Return naked register if a register or a register subreg is
12096 found, otherwise return NULL_RTX. */
12099 ix86_validate_address_register (rtx op)
12101 enum machine_mode mode = GET_MODE (op);
12103 /* Only SImode or DImode registers can form the address. */
12104 if (mode != SImode && mode != DImode)
12109 else if (GET_CODE (op) == SUBREG)
12111 rtx reg = SUBREG_REG (op);
12116 mode = GET_MODE (reg);
12118 /* Don't allow SUBREGs that span more than a word. It can
12119 lead to spill failures when the register is one word out
12120 of a two word structure. */
12121 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12124 /* Allow only SUBREGs of non-eliminable hard registers. */
12125 if (register_no_elim_operand (reg, mode))
12129 /* Op is not a register. */
12133 /* Recognizes RTL expressions that are valid memory addresses for an
12134 instruction. The MODE argument is the machine mode for the MEM
12135 expression that wants to use this address.
12137 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12138 convert common non-canonical forms to canonical form so that they will
12142 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12143 rtx addr, bool strict)
12145 struct ix86_address parts;
12146 rtx base, index, disp;
12147 HOST_WIDE_INT scale;
12148 enum ix86_address_seg seg;
12150 if (ix86_decompose_address (addr, &parts) <= 0)
12151 /* Decomposition failed. */
12155 index = parts.index;
12157 scale = parts.scale;
12160 /* Validate base register. */
12163 rtx reg = ix86_validate_address_register (base);
12165 if (reg == NULL_RTX)
12168 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12169 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12170 /* Base is not valid. */
12174 /* Validate index register. */
12177 rtx reg = ix86_validate_address_register (index);
12179 if (reg == NULL_RTX)
12182 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12183 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12184 /* Index is not valid. */
12188 /* Index and base should have the same mode. */
12190 && GET_MODE (base) != GET_MODE (index))
12193 /* Address override works only on the (%reg) part of %fs:(%reg). */
12194 if (seg != SEG_DEFAULT
12195 && ((base && GET_MODE (base) != word_mode)
12196 || (index && GET_MODE (index) != word_mode)))
12199 /* Validate scale factor. */
12203 /* Scale without index. */
12206 if (scale != 2 && scale != 4 && scale != 8)
12207 /* Scale is not a valid multiplier. */
12211 /* Validate displacement. */
12214 if (GET_CODE (disp) == CONST
12215 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12216 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12217 switch (XINT (XEXP (disp, 0), 1))
12219 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12220 used. While ABI specify also 32bit relocations, we don't produce
12221 them at all and use IP relative instead. */
12223 case UNSPEC_GOTOFF:
12224 gcc_assert (flag_pic);
12226 goto is_legitimate_pic;
12228 /* 64bit address unspec. */
12231 case UNSPEC_GOTPCREL:
12233 gcc_assert (flag_pic);
12234 goto is_legitimate_pic;
12236 case UNSPEC_GOTTPOFF:
12237 case UNSPEC_GOTNTPOFF:
12238 case UNSPEC_INDNTPOFF:
12239 case UNSPEC_NTPOFF:
12240 case UNSPEC_DTPOFF:
12243 case UNSPEC_STACK_CHECK:
12244 gcc_assert (flag_split_stack);
12248 /* Invalid address unspec. */
12252 else if (SYMBOLIC_CONST (disp)
12256 && MACHOPIC_INDIRECT
12257 && !machopic_operand_p (disp)
12263 if (TARGET_64BIT && (index || base))
12265 /* foo@dtpoff(%rX) is ok. */
12266 if (GET_CODE (disp) != CONST
12267 || GET_CODE (XEXP (disp, 0)) != PLUS
12268 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12269 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12270 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12271 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12272 /* Non-constant pic memory reference. */
12275 else if ((!TARGET_MACHO || flag_pic)
12276 && ! legitimate_pic_address_disp_p (disp))
12277 /* Displacement is an invalid pic construct. */
12280 else if (MACHO_DYNAMIC_NO_PIC_P
12281 && !ix86_legitimate_constant_p (Pmode, disp))
12282 /* displacment must be referenced via non_lazy_pointer */
12286 /* This code used to verify that a symbolic pic displacement
12287 includes the pic_offset_table_rtx register.
12289 While this is good idea, unfortunately these constructs may
12290 be created by "adds using lea" optimization for incorrect
12299 This code is nonsensical, but results in addressing
12300 GOT table with pic_offset_table_rtx base. We can't
12301 just refuse it easily, since it gets matched by
12302 "addsi3" pattern, that later gets split to lea in the
12303 case output register differs from input. While this
12304 can be handled by separate addsi pattern for this case
12305 that never results in lea, this seems to be easier and
12306 correct fix for crash to disable this test. */
12308 else if (GET_CODE (disp) != LABEL_REF
12309 && !CONST_INT_P (disp)
12310 && (GET_CODE (disp) != CONST
12311 || !ix86_legitimate_constant_p (Pmode, disp))
12312 && (GET_CODE (disp) != SYMBOL_REF
12313 || !ix86_legitimate_constant_p (Pmode, disp)))
12314 /* Displacement is not constant. */
12316 else if (TARGET_64BIT
12317 && !x86_64_immediate_operand (disp, VOIDmode))
12318 /* Displacement is out of range. */
12320 /* In x32 mode, constant addresses are sign extended to 64bit, so
12321 we have to prevent addresses from 0x80000000 to 0xffffffff. */
12322 else if (TARGET_X32 && !(index || base)
12323 && CONST_INT_P (disp)
12324 && val_signbit_known_set_p (SImode, INTVAL (disp)))
12328 /* Everything looks valid. */
12332 /* Determine if a given RTX is a valid constant address. */
12335 constant_address_p (rtx x)
12337 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12340 /* Return a unique alias set for the GOT. */
12342 static alias_set_type
12343 ix86_GOT_alias_set (void)
12345 static alias_set_type set = -1;
12347 set = new_alias_set ();
12351 /* Return a legitimate reference for ORIG (an address) using the
12352 register REG. If REG is 0, a new pseudo is generated.
12354 There are two types of references that must be handled:
12356 1. Global data references must load the address from the GOT, via
12357 the PIC reg. An insn is emitted to do this load, and the reg is
12360 2. Static data references, constant pool addresses, and code labels
12361 compute the address as an offset from the GOT, whose base is in
12362 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12363 differentiate them from global data objects. The returned
12364 address is the PIC reg + an unspec constant.
12366 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12367 reg also appears in the address. */
12370 legitimize_pic_address (rtx orig, rtx reg)
12373 rtx new_rtx = orig;
12376 if (TARGET_MACHO && !TARGET_64BIT)
12379 reg = gen_reg_rtx (Pmode);
12380 /* Use the generic Mach-O PIC machinery. */
12381 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12385 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12387 else if (TARGET_64BIT
12388 && ix86_cmodel != CM_SMALL_PIC
12389 && gotoff_operand (addr, Pmode))
12392 /* This symbol may be referenced via a displacement from the PIC
12393 base address (@GOTOFF). */
12395 if (reload_in_progress)
12396 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12397 if (GET_CODE (addr) == CONST)
12398 addr = XEXP (addr, 0);
12399 if (GET_CODE (addr) == PLUS)
12401 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12403 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12406 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12407 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12409 tmpreg = gen_reg_rtx (Pmode);
12412 emit_move_insn (tmpreg, new_rtx);
12416 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12417 tmpreg, 1, OPTAB_DIRECT);
12420 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12422 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12424 /* This symbol may be referenced via a displacement from the PIC
12425 base address (@GOTOFF). */
12427 if (reload_in_progress)
12428 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12429 if (GET_CODE (addr) == CONST)
12430 addr = XEXP (addr, 0);
12431 if (GET_CODE (addr) == PLUS)
12433 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12435 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12438 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12439 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12440 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12444 emit_move_insn (reg, new_rtx);
12448 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12449 /* We can't use @GOTOFF for text labels on VxWorks;
12450 see gotoff_operand. */
12451 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12453 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12455 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12456 return legitimize_dllimport_symbol (addr, true);
12457 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12458 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12459 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12461 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12462 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12466 /* For x64 PE-COFF there is no GOT table. So we use address
12468 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12470 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12471 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12474 reg = gen_reg_rtx (Pmode);
12475 emit_move_insn (reg, new_rtx);
12478 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12480 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12481 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12482 new_rtx = gen_const_mem (Pmode, new_rtx);
12483 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12486 reg = gen_reg_rtx (Pmode);
12487 /* Use directly gen_movsi, otherwise the address is loaded
12488 into register for CSE. We don't want to CSE this addresses,
12489 instead we CSE addresses from the GOT table, so skip this. */
12490 emit_insn (gen_movsi (reg, new_rtx));
12495 /* This symbol must be referenced via a load from the
12496 Global Offset Table (@GOT). */
12498 if (reload_in_progress)
12499 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12500 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12501 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12503 new_rtx = force_reg (Pmode, new_rtx);
12504 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12505 new_rtx = gen_const_mem (Pmode, new_rtx);
12506 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12509 reg = gen_reg_rtx (Pmode);
12510 emit_move_insn (reg, new_rtx);
12516 if (CONST_INT_P (addr)
12517 && !x86_64_immediate_operand (addr, VOIDmode))
12521 emit_move_insn (reg, addr);
12525 new_rtx = force_reg (Pmode, addr);
12527 else if (GET_CODE (addr) == CONST)
12529 addr = XEXP (addr, 0);
12531 /* We must match stuff we generate before. Assume the only
12532 unspecs that can get here are ours. Not that we could do
12533 anything with them anyway.... */
12534 if (GET_CODE (addr) == UNSPEC
12535 || (GET_CODE (addr) == PLUS
12536 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12538 gcc_assert (GET_CODE (addr) == PLUS);
12540 if (GET_CODE (addr) == PLUS)
12542 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12544 /* Check first to see if this is a constant offset from a @GOTOFF
12545 symbol reference. */
12546 if (gotoff_operand (op0, Pmode)
12547 && CONST_INT_P (op1))
12551 if (reload_in_progress)
12552 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12553 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12555 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12556 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12557 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12561 emit_move_insn (reg, new_rtx);
12567 if (INTVAL (op1) < -16*1024*1024
12568 || INTVAL (op1) >= 16*1024*1024)
12570 if (!x86_64_immediate_operand (op1, Pmode))
12571 op1 = force_reg (Pmode, op1);
12572 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12578 rtx base = legitimize_pic_address (op0, reg);
12579 enum machine_mode mode = GET_MODE (base);
12581 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
12583 if (CONST_INT_P (new_rtx))
12585 if (INTVAL (new_rtx) < -16*1024*1024
12586 || INTVAL (new_rtx) >= 16*1024*1024)
12588 if (!x86_64_immediate_operand (new_rtx, mode))
12589 new_rtx = force_reg (mode, new_rtx);
12591 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
12594 new_rtx = plus_constant (base, INTVAL (new_rtx));
12598 if (GET_CODE (new_rtx) == PLUS
12599 && CONSTANT_P (XEXP (new_rtx, 1)))
12601 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
12602 new_rtx = XEXP (new_rtx, 1);
12604 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
12612 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12615 get_thread_pointer (bool to_reg)
12617 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12619 if (GET_MODE (tp) != Pmode)
12620 tp = convert_to_mode (Pmode, tp, 1);
12623 tp = copy_addr_to_reg (tp);
12628 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12630 static GTY(()) rtx ix86_tls_symbol;
12633 ix86_tls_get_addr (void)
12635 if (!ix86_tls_symbol)
12638 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12639 ? "___tls_get_addr" : "__tls_get_addr");
12641 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12644 return ix86_tls_symbol;
12647 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12649 static GTY(()) rtx ix86_tls_module_base_symbol;
12652 ix86_tls_module_base (void)
12654 if (!ix86_tls_module_base_symbol)
12656 ix86_tls_module_base_symbol
12657 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12659 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12660 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12663 return ix86_tls_module_base_symbol;
12666 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12667 false if we expect this to be used for a memory address and true if
12668 we expect to load the address into a register. */
12671 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12673 rtx dest, base, off;
12674 rtx pic = NULL_RTX, tp = NULL_RTX;
12679 case TLS_MODEL_GLOBAL_DYNAMIC:
12680 dest = gen_reg_rtx (Pmode);
12685 pic = pic_offset_table_rtx;
12688 pic = gen_reg_rtx (Pmode);
12689 emit_insn (gen_set_got (pic));
12693 if (TARGET_GNU2_TLS)
12696 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12698 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12700 tp = get_thread_pointer (true);
12701 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12703 if (GET_MODE (x) != Pmode)
12704 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12706 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12710 rtx caddr = ix86_tls_get_addr ();
12714 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12718 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12719 insns = get_insns ();
12722 if (GET_MODE (x) != Pmode)
12723 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12725 RTL_CONST_CALL_P (insns) = 1;
12726 emit_libcall_block (insns, dest, rax, x);
12729 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12733 case TLS_MODEL_LOCAL_DYNAMIC:
12734 base = gen_reg_rtx (Pmode);
12739 pic = pic_offset_table_rtx;
12742 pic = gen_reg_rtx (Pmode);
12743 emit_insn (gen_set_got (pic));
12747 if (TARGET_GNU2_TLS)
12749 rtx tmp = ix86_tls_module_base ();
12752 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12754 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12756 tp = get_thread_pointer (true);
12757 set_unique_reg_note (get_last_insn (), REG_EQUAL,
12758 gen_rtx_MINUS (Pmode, tmp, tp));
12762 rtx caddr = ix86_tls_get_addr ();
12766 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12770 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12771 insns = get_insns ();
12774 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12775 share the LD_BASE result with other LD model accesses. */
12776 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12777 UNSPEC_TLS_LD_BASE);
12779 RTL_CONST_CALL_P (insns) = 1;
12780 emit_libcall_block (insns, base, rax, eqv);
12783 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12786 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12787 off = gen_rtx_CONST (Pmode, off);
12789 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12791 if (TARGET_GNU2_TLS)
12793 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12795 if (GET_MODE (x) != Pmode)
12796 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12798 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12802 case TLS_MODEL_INITIAL_EXEC:
12805 if (TARGET_SUN_TLS)
12807 /* The Sun linker took the AMD64 TLS spec literally
12808 and can only handle %rax as destination of the
12809 initial executable code sequence. */
12811 dest = gen_reg_rtx (Pmode);
12812 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12817 type = UNSPEC_GOTNTPOFF;
12821 if (reload_in_progress)
12822 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12823 pic = pic_offset_table_rtx;
12824 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12826 else if (!TARGET_ANY_GNU_TLS)
12828 pic = gen_reg_rtx (Pmode);
12829 emit_insn (gen_set_got (pic));
12830 type = UNSPEC_GOTTPOFF;
12835 type = UNSPEC_INDNTPOFF;
12838 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12839 off = gen_rtx_CONST (Pmode, off);
12841 off = gen_rtx_PLUS (Pmode, pic, off);
12842 off = gen_const_mem (Pmode, off);
12843 set_mem_alias_set (off, ix86_GOT_alias_set ());
12845 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12847 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12848 off = force_reg (Pmode, off);
12849 return gen_rtx_PLUS (Pmode, base, off);
12853 base = get_thread_pointer (true);
12854 dest = gen_reg_rtx (Pmode);
12855 emit_insn (gen_subsi3 (dest, base, off));
12859 case TLS_MODEL_LOCAL_EXEC:
12860 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12861 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12862 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12863 off = gen_rtx_CONST (Pmode, off);
12865 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12867 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12868 return gen_rtx_PLUS (Pmode, base, off);
12872 base = get_thread_pointer (true);
12873 dest = gen_reg_rtx (Pmode);
12874 emit_insn (gen_subsi3 (dest, base, off));
12879 gcc_unreachable ();
12885 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12888 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12889 htab_t dllimport_map;
12892 get_dllimport_decl (tree decl)
12894 struct tree_map *h, in;
12897 const char *prefix;
12898 size_t namelen, prefixlen;
12903 if (!dllimport_map)
12904 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12906 in.hash = htab_hash_pointer (decl);
12907 in.base.from = decl;
12908 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12909 h = (struct tree_map *) *loc;
12913 *loc = h = ggc_alloc_tree_map ();
12915 h->base.from = decl;
12916 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12917 VAR_DECL, NULL, ptr_type_node);
12918 DECL_ARTIFICIAL (to) = 1;
12919 DECL_IGNORED_P (to) = 1;
12920 DECL_EXTERNAL (to) = 1;
12921 TREE_READONLY (to) = 1;
12923 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12924 name = targetm.strip_name_encoding (name);
12925 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12926 ? "*__imp_" : "*__imp__";
12927 namelen = strlen (name);
12928 prefixlen = strlen (prefix);
12929 imp_name = (char *) alloca (namelen + prefixlen + 1);
12930 memcpy (imp_name, prefix, prefixlen);
12931 memcpy (imp_name + prefixlen, name, namelen + 1);
12933 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12934 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12935 SET_SYMBOL_REF_DECL (rtl, to);
12936 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12938 rtl = gen_const_mem (Pmode, rtl);
12939 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12941 SET_DECL_RTL (to, rtl);
12942 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12947 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12948 true if we require the result be a register. */
12951 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12956 gcc_assert (SYMBOL_REF_DECL (symbol));
12957 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12959 x = DECL_RTL (imp_decl);
12961 x = force_reg (Pmode, x);
12965 /* Try machine-dependent ways of modifying an illegitimate address
12966 to be legitimate. If we find one, return the new, valid address.
12967 This macro is used in only one place: `memory_address' in explow.c.
12969 OLDX is the address as it was before break_out_memory_refs was called.
12970 In some cases it is useful to look at this to decide what needs to be done.
12972 It is always safe for this macro to do nothing. It exists to recognize
12973 opportunities to optimize the output.
12975 For the 80386, we handle X+REG by loading X into a register R and
12976 using R+REG. R will go in a general reg and indexing will be used.
12977 However, if REG is a broken-out memory address or multiplication,
12978 nothing needs to be done because REG can certainly go in a general reg.
12980 When -fpic is used, special handling is needed for symbolic references.
12981 See comments by legitimize_pic_address in i386.c for details. */
12984 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12985 enum machine_mode mode)
12990 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12992 return legitimize_tls_address (x, (enum tls_model) log, false);
12993 if (GET_CODE (x) == CONST
12994 && GET_CODE (XEXP (x, 0)) == PLUS
12995 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12996 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12998 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12999 (enum tls_model) log, false);
13000 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13003 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13005 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
13006 return legitimize_dllimport_symbol (x, true);
13007 if (GET_CODE (x) == CONST
13008 && GET_CODE (XEXP (x, 0)) == PLUS
13009 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13010 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
13012 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
13013 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13017 if (flag_pic && SYMBOLIC_CONST (x))
13018 return legitimize_pic_address (x, 0);
13021 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13022 return machopic_indirect_data_reference (x, 0);
13025 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13026 if (GET_CODE (x) == ASHIFT
13027 && CONST_INT_P (XEXP (x, 1))
13028 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13031 log = INTVAL (XEXP (x, 1));
13032 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13033 GEN_INT (1 << log));
13036 if (GET_CODE (x) == PLUS)
13038 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13040 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13041 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13042 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13045 log = INTVAL (XEXP (XEXP (x, 0), 1));
13046 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13047 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13048 GEN_INT (1 << log));
13051 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13052 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13053 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13056 log = INTVAL (XEXP (XEXP (x, 1), 1));
13057 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13058 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13059 GEN_INT (1 << log));
13062 /* Put multiply first if it isn't already. */
13063 if (GET_CODE (XEXP (x, 1)) == MULT)
13065 rtx tmp = XEXP (x, 0);
13066 XEXP (x, 0) = XEXP (x, 1);
13071 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13072 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13073 created by virtual register instantiation, register elimination, and
13074 similar optimizations. */
13075 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13078 x = gen_rtx_PLUS (Pmode,
13079 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13080 XEXP (XEXP (x, 1), 0)),
13081 XEXP (XEXP (x, 1), 1));
13085 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13086 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13087 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13088 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13089 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13090 && CONSTANT_P (XEXP (x, 1)))
13093 rtx other = NULL_RTX;
13095 if (CONST_INT_P (XEXP (x, 1)))
13097 constant = XEXP (x, 1);
13098 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13100 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13102 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13103 other = XEXP (x, 1);
13111 x = gen_rtx_PLUS (Pmode,
13112 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13113 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13114 plus_constant (other, INTVAL (constant)));
13118 if (changed && ix86_legitimate_address_p (mode, x, false))
13121 if (GET_CODE (XEXP (x, 0)) == MULT)
13124 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13127 if (GET_CODE (XEXP (x, 1)) == MULT)
13130 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13134 && REG_P (XEXP (x, 1))
13135 && REG_P (XEXP (x, 0)))
13138 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13141 x = legitimize_pic_address (x, 0);
13144 if (changed && ix86_legitimate_address_p (mode, x, false))
13147 if (REG_P (XEXP (x, 0)))
13149 rtx temp = gen_reg_rtx (Pmode);
13150 rtx val = force_operand (XEXP (x, 1), temp);
13153 if (GET_MODE (val) != Pmode)
13154 val = convert_to_mode (Pmode, val, 1);
13155 emit_move_insn (temp, val);
13158 XEXP (x, 1) = temp;
13162 else if (REG_P (XEXP (x, 1)))
13164 rtx temp = gen_reg_rtx (Pmode);
13165 rtx val = force_operand (XEXP (x, 0), temp);
13168 if (GET_MODE (val) != Pmode)
13169 val = convert_to_mode (Pmode, val, 1);
13170 emit_move_insn (temp, val);
13173 XEXP (x, 0) = temp;
13181 /* Print an integer constant expression in assembler syntax. Addition
13182 and subtraction are the only arithmetic that may appear in these
13183 expressions. FILE is the stdio stream to write to, X is the rtx, and
13184 CODE is the operand print code from the output string. */
13187 output_pic_addr_const (FILE *file, rtx x, int code)
13191 switch (GET_CODE (x))
13194 gcc_assert (flag_pic);
13199 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13200 output_addr_const (file, x);
13203 const char *name = XSTR (x, 0);
13205 /* Mark the decl as referenced so that cgraph will
13206 output the function. */
13207 if (SYMBOL_REF_DECL (x))
13208 mark_decl_referenced (SYMBOL_REF_DECL (x));
13211 if (MACHOPIC_INDIRECT
13212 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13213 name = machopic_indirection_name (x, /*stub_p=*/true);
13215 assemble_name (file, name);
13217 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13218 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13219 fputs ("@PLT", file);
13226 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13227 assemble_name (asm_out_file, buf);
13231 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13235 /* This used to output parentheses around the expression,
13236 but that does not work on the 386 (either ATT or BSD assembler). */
13237 output_pic_addr_const (file, XEXP (x, 0), code);
13241 if (GET_MODE (x) == VOIDmode)
13243 /* We can use %d if the number is <32 bits and positive. */
13244 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13245 fprintf (file, "0x%lx%08lx",
13246 (unsigned long) CONST_DOUBLE_HIGH (x),
13247 (unsigned long) CONST_DOUBLE_LOW (x));
13249 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13252 /* We can't handle floating point constants;
13253 TARGET_PRINT_OPERAND must handle them. */
13254 output_operand_lossage ("floating constant misused");
13258 /* Some assemblers need integer constants to appear first. */
13259 if (CONST_INT_P (XEXP (x, 0)))
13261 output_pic_addr_const (file, XEXP (x, 0), code);
13263 output_pic_addr_const (file, XEXP (x, 1), code);
13267 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13268 output_pic_addr_const (file, XEXP (x, 1), code);
13270 output_pic_addr_const (file, XEXP (x, 0), code);
13276 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13277 output_pic_addr_const (file, XEXP (x, 0), code);
13279 output_pic_addr_const (file, XEXP (x, 1), code);
13281 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13285 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13287 bool f = i386_asm_output_addr_const_extra (file, x);
13292 gcc_assert (XVECLEN (x, 0) == 1);
13293 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13294 switch (XINT (x, 1))
13297 fputs ("@GOT", file);
13299 case UNSPEC_GOTOFF:
13300 fputs ("@GOTOFF", file);
13302 case UNSPEC_PLTOFF:
13303 fputs ("@PLTOFF", file);
13306 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13307 "(%rip)" : "[rip]", file);
13309 case UNSPEC_GOTPCREL:
13310 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13311 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13313 case UNSPEC_GOTTPOFF:
13314 /* FIXME: This might be @TPOFF in Sun ld too. */
13315 fputs ("@gottpoff", file);
13318 fputs ("@tpoff", file);
13320 case UNSPEC_NTPOFF:
13322 fputs ("@tpoff", file);
13324 fputs ("@ntpoff", file);
13326 case UNSPEC_DTPOFF:
13327 fputs ("@dtpoff", file);
13329 case UNSPEC_GOTNTPOFF:
13331 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13332 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13334 fputs ("@gotntpoff", file);
13336 case UNSPEC_INDNTPOFF:
13337 fputs ("@indntpoff", file);
13340 case UNSPEC_MACHOPIC_OFFSET:
13342 machopic_output_function_base_name (file);
13346 output_operand_lossage ("invalid UNSPEC as operand");
13352 output_operand_lossage ("invalid expression as operand");
13356 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13357 We need to emit DTP-relative relocations. */
13359 static void ATTRIBUTE_UNUSED
13360 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13362 fputs (ASM_LONG, file);
13363 output_addr_const (file, x);
13364 fputs ("@dtpoff", file);
13370 fputs (", 0", file);
13373 gcc_unreachable ();
13377 /* Return true if X is a representation of the PIC register. This copes
13378 with calls from ix86_find_base_term, where the register might have
13379 been replaced by a cselib value. */
13382 ix86_pic_register_p (rtx x)
13384 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13385 return (pic_offset_table_rtx
13386 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13388 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13391 /* Helper function for ix86_delegitimize_address.
13392 Attempt to delegitimize TLS local-exec accesses. */
13395 ix86_delegitimize_tls_address (rtx orig_x)
13397 rtx x = orig_x, unspec;
13398 struct ix86_address addr;
13400 if (!TARGET_TLS_DIRECT_SEG_REFS)
13404 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13406 if (ix86_decompose_address (x, &addr) == 0
13407 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13408 || addr.disp == NULL_RTX
13409 || GET_CODE (addr.disp) != CONST)
13411 unspec = XEXP (addr.disp, 0);
13412 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13413 unspec = XEXP (unspec, 0);
13414 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13416 x = XVECEXP (unspec, 0, 0);
13417 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13418 if (unspec != XEXP (addr.disp, 0))
13419 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13422 rtx idx = addr.index;
13423 if (addr.scale != 1)
13424 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13425 x = gen_rtx_PLUS (Pmode, idx, x);
13428 x = gen_rtx_PLUS (Pmode, addr.base, x);
13429 if (MEM_P (orig_x))
13430 x = replace_equiv_address_nv (orig_x, x);
13434 /* In the name of slightly smaller debug output, and to cater to
13435 general assembler lossage, recognize PIC+GOTOFF and turn it back
13436 into a direct symbol reference.
13438 On Darwin, this is necessary to avoid a crash, because Darwin
13439 has a different PIC label for each routine but the DWARF debugging
13440 information is not associated with any particular routine, so it's
13441 necessary to remove references to the PIC label from RTL stored by
13442 the DWARF output code. */
13445 ix86_delegitimize_address (rtx x)
13447 rtx orig_x = delegitimize_mem_from_attrs (x);
13448 /* addend is NULL or some rtx if x is something+GOTOFF where
13449 something doesn't include the PIC register. */
13450 rtx addend = NULL_RTX;
13451 /* reg_addend is NULL or a multiple of some register. */
13452 rtx reg_addend = NULL_RTX;
13453 /* const_addend is NULL or a const_int. */
13454 rtx const_addend = NULL_RTX;
13455 /* This is the result, or NULL. */
13456 rtx result = NULL_RTX;
13465 if (GET_CODE (x) == CONST
13466 && GET_CODE (XEXP (x, 0)) == PLUS
13467 && GET_MODE (XEXP (x, 0)) == Pmode
13468 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13469 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13470 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13472 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13473 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13474 if (MEM_P (orig_x))
13475 x = replace_equiv_address_nv (orig_x, x);
13478 if (GET_CODE (x) != CONST
13479 || GET_CODE (XEXP (x, 0)) != UNSPEC
13480 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13481 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13482 || (!MEM_P (orig_x) && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL))
13483 return ix86_delegitimize_tls_address (orig_x);
13484 x = XVECEXP (XEXP (x, 0), 0, 0);
13485 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13487 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13495 if (GET_CODE (x) != PLUS
13496 || GET_CODE (XEXP (x, 1)) != CONST)
13497 return ix86_delegitimize_tls_address (orig_x);
13499 if (ix86_pic_register_p (XEXP (x, 0)))
13500 /* %ebx + GOT/GOTOFF */
13502 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13504 /* %ebx + %reg * scale + GOT/GOTOFF */
13505 reg_addend = XEXP (x, 0);
13506 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13507 reg_addend = XEXP (reg_addend, 1);
13508 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13509 reg_addend = XEXP (reg_addend, 0);
13512 reg_addend = NULL_RTX;
13513 addend = XEXP (x, 0);
13517 addend = XEXP (x, 0);
13519 x = XEXP (XEXP (x, 1), 0);
13520 if (GET_CODE (x) == PLUS
13521 && CONST_INT_P (XEXP (x, 1)))
13523 const_addend = XEXP (x, 1);
13527 if (GET_CODE (x) == UNSPEC
13528 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13529 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13530 result = XVECEXP (x, 0, 0);
13532 if (TARGET_MACHO && darwin_local_data_pic (x)
13533 && !MEM_P (orig_x))
13534 result = XVECEXP (x, 0, 0);
13537 return ix86_delegitimize_tls_address (orig_x);
13540 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13542 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13545 /* If the rest of original X doesn't involve the PIC register, add
13546 addend and subtract pic_offset_table_rtx. This can happen e.g.
13548 leal (%ebx, %ecx, 4), %ecx
13550 movl foo@GOTOFF(%ecx), %edx
13551 in which case we return (%ecx - %ebx) + foo. */
13552 if (pic_offset_table_rtx)
13553 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13554 pic_offset_table_rtx),
13559 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13561 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13562 if (result == NULL_RTX)
13568 /* If X is a machine specific address (i.e. a symbol or label being
13569 referenced as a displacement from the GOT implemented using an
13570 UNSPEC), then return the base term. Otherwise return X. */
13573 ix86_find_base_term (rtx x)
13579 if (GET_CODE (x) != CONST)
13581 term = XEXP (x, 0);
13582 if (GET_CODE (term) == PLUS
13583 && (CONST_INT_P (XEXP (term, 1))
13584 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13585 term = XEXP (term, 0);
13586 if (GET_CODE (term) != UNSPEC
13587 || (XINT (term, 1) != UNSPEC_GOTPCREL
13588 && XINT (term, 1) != UNSPEC_PCREL))
13591 return XVECEXP (term, 0, 0);
13594 return ix86_delegitimize_address (x);
13598 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13599 int fp, FILE *file)
13601 const char *suffix;
13603 if (mode == CCFPmode || mode == CCFPUmode)
13605 code = ix86_fp_compare_code_to_integer (code);
13609 code = reverse_condition (code);
13660 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13664 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13665 Those same assemblers have the same but opposite lossage on cmov. */
13666 if (mode == CCmode)
13667 suffix = fp ? "nbe" : "a";
13669 gcc_unreachable ();
13685 gcc_unreachable ();
13689 if (mode == CCmode)
13691 else if (mode == CCCmode)
13694 gcc_unreachable ();
13710 gcc_unreachable ();
13714 if (mode == CCmode)
13715 suffix = fp ? "nb" : "ae";
13716 else if (mode == CCCmode)
13719 gcc_unreachable ();
13722 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13726 if (mode == CCmode)
13729 gcc_unreachable ();
13732 suffix = fp ? "u" : "p";
13735 suffix = fp ? "nu" : "np";
13738 gcc_unreachable ();
13740 fputs (suffix, file);
13743 /* Print the name of register X to FILE based on its machine mode and number.
13744 If CODE is 'w', pretend the mode is HImode.
13745 If CODE is 'b', pretend the mode is QImode.
13746 If CODE is 'k', pretend the mode is SImode.
13747 If CODE is 'q', pretend the mode is DImode.
13748 If CODE is 'x', pretend the mode is V4SFmode.
13749 If CODE is 't', pretend the mode is V8SFmode.
13750 If CODE is 'h', pretend the reg is the 'high' byte register.
13751 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13752 If CODE is 'd', duplicate the operand for AVX instruction.
13756 print_reg (rtx x, int code, FILE *file)
13759 unsigned int regno;
13760 bool duplicated = code == 'd' && TARGET_AVX;
13762 if (ASSEMBLER_DIALECT == ASM_ATT)
13767 gcc_assert (TARGET_64BIT);
13768 fputs ("rip", file);
13772 regno = true_regnum (x);
13773 gcc_assert (regno != ARG_POINTER_REGNUM
13774 && regno != FRAME_POINTER_REGNUM
13775 && regno != FLAGS_REG
13776 && regno != FPSR_REG
13777 && regno != FPCR_REG);
13779 if (code == 'w' || MMX_REG_P (x))
13781 else if (code == 'b')
13783 else if (code == 'k')
13785 else if (code == 'q')
13787 else if (code == 'y')
13789 else if (code == 'h')
13791 else if (code == 'x')
13793 else if (code == 't')
13796 code = GET_MODE_SIZE (GET_MODE (x));
13798 /* Irritatingly, AMD extended registers use different naming convention
13799 from the normal registers: "r%d[bwd]" */
13800 if (REX_INT_REGNO_P (regno))
13802 gcc_assert (TARGET_64BIT);
13804 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
13808 error ("extended registers have no high halves");
13823 error ("unsupported operand size for extended register");
13833 if (STACK_TOP_P (x))
13842 if (! ANY_FP_REG_P (x))
13843 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13848 reg = hi_reg_name[regno];
13851 if (regno >= ARRAY_SIZE (qi_reg_name))
13853 reg = qi_reg_name[regno];
13856 if (regno >= ARRAY_SIZE (qi_high_reg_name))
13858 reg = qi_high_reg_name[regno];
13863 gcc_assert (!duplicated);
13865 fputs (hi_reg_name[regno] + 1, file);
13870 gcc_unreachable ();
13876 if (ASSEMBLER_DIALECT == ASM_ATT)
13877 fprintf (file, ", %%%s", reg);
13879 fprintf (file, ", %s", reg);
13883 /* Locate some local-dynamic symbol still in use by this function
13884 so that we can print its name in some tls_local_dynamic_base
13888 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13892 if (GET_CODE (x) == SYMBOL_REF
13893 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13895 cfun->machine->some_ld_name = XSTR (x, 0);
13902 static const char *
13903 get_some_local_dynamic_name (void)
13907 if (cfun->machine->some_ld_name)
13908 return cfun->machine->some_ld_name;
13910 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13911 if (NONDEBUG_INSN_P (insn)
13912 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13913 return cfun->machine->some_ld_name;
13918 /* Meaning of CODE:
13919 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13920 C -- print opcode suffix for set/cmov insn.
13921 c -- like C, but print reversed condition
13922 F,f -- likewise, but for floating-point.
13923 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13925 R -- print the prefix for register names.
13926 z -- print the opcode suffix for the size of the current operand.
13927 Z -- likewise, with special suffixes for x87 instructions.
13928 * -- print a star (in certain assembler syntax)
13929 A -- print an absolute memory reference.
13930 E -- print address with DImode register names if TARGET_64BIT.
13931 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13932 s -- print a shift double count, followed by the assemblers argument
13934 b -- print the QImode name of the register for the indicated operand.
13935 %b0 would print %al if operands[0] is reg 0.
13936 w -- likewise, print the HImode name of the register.
13937 k -- likewise, print the SImode name of the register.
13938 q -- likewise, print the DImode name of the register.
13939 x -- likewise, print the V4SFmode name of the register.
13940 t -- likewise, print the V8SFmode name of the register.
13941 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13942 y -- print "st(0)" instead of "st" as a register.
13943 d -- print duplicated register operand for AVX instruction.
13944 D -- print condition for SSE cmp instruction.
13945 P -- if PIC, print an @PLT suffix.
13946 p -- print raw symbol name.
13947 X -- don't print any sort of PIC '@' suffix for a symbol.
13948 & -- print some in-use local-dynamic symbol name.
13949 H -- print a memory address offset by 8; used for sse high-parts
13950 Y -- print condition for XOP pcom* instruction.
13951 + -- print a branch hint as 'cs' or 'ds' prefix
13952 ; -- print a semicolon (after prefixes due to bug in older gas).
13953 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13954 @ -- print a segment register of thread base pointer load
13958 ix86_print_operand (FILE *file, rtx x, int code)
13965 if (ASSEMBLER_DIALECT == ASM_ATT)
13971 const char *name = get_some_local_dynamic_name ();
13973 output_operand_lossage ("'%%&' used without any "
13974 "local dynamic TLS references");
13976 assemble_name (file, name);
13981 switch (ASSEMBLER_DIALECT)
13988 /* Intel syntax. For absolute addresses, registers should not
13989 be surrounded by braces. */
13993 ix86_print_operand (file, x, 0);
14000 gcc_unreachable ();
14003 ix86_print_operand (file, x, 0);
14007 /* Wrap address in an UNSPEC to declare special handling. */
14009 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14011 output_address (x);
14015 if (ASSEMBLER_DIALECT == ASM_ATT)
14020 if (ASSEMBLER_DIALECT == ASM_ATT)
14025 if (ASSEMBLER_DIALECT == ASM_ATT)
14030 if (ASSEMBLER_DIALECT == ASM_ATT)
14035 if (ASSEMBLER_DIALECT == ASM_ATT)
14040 if (ASSEMBLER_DIALECT == ASM_ATT)
14045 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14047 /* Opcodes don't get size suffixes if using Intel opcodes. */
14048 if (ASSEMBLER_DIALECT == ASM_INTEL)
14051 switch (GET_MODE_SIZE (GET_MODE (x)))
14070 output_operand_lossage
14071 ("invalid operand size for operand code '%c'", code);
14076 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14078 (0, "non-integer operand used with operand code '%c'", code);
14082 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14083 if (ASSEMBLER_DIALECT == ASM_INTEL)
14086 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14088 switch (GET_MODE_SIZE (GET_MODE (x)))
14091 #ifdef HAVE_AS_IX86_FILDS
14101 #ifdef HAVE_AS_IX86_FILDQ
14104 fputs ("ll", file);
14112 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14114 /* 387 opcodes don't get size suffixes
14115 if the operands are registers. */
14116 if (STACK_REG_P (x))
14119 switch (GET_MODE_SIZE (GET_MODE (x)))
14140 output_operand_lossage
14141 ("invalid operand type used with operand code '%c'", code);
14145 output_operand_lossage
14146 ("invalid operand size for operand code '%c'", code);
14164 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14166 ix86_print_operand (file, x, 0);
14167 fputs (", ", file);
14172 /* Little bit of braindamage here. The SSE compare instructions
14173 does use completely different names for the comparisons that the
14174 fp conditional moves. */
14177 switch (GET_CODE (x))
14180 fputs ("eq", file);
14183 fputs ("eq_us", file);
14186 fputs ("lt", file);
14189 fputs ("nge", file);
14192 fputs ("le", file);
14195 fputs ("ngt", file);
14198 fputs ("unord", file);
14201 fputs ("neq", file);
14204 fputs ("neq_oq", file);
14207 fputs ("ge", file);
14210 fputs ("nlt", file);
14213 fputs ("gt", file);
14216 fputs ("nle", file);
14219 fputs ("ord", file);
14222 output_operand_lossage ("operand is not a condition code, "
14223 "invalid operand code 'D'");
14229 switch (GET_CODE (x))
14233 fputs ("eq", file);
14237 fputs ("lt", file);
14241 fputs ("le", file);
14244 fputs ("unord", file);
14248 fputs ("neq", file);
14252 fputs ("nlt", file);
14256 fputs ("nle", file);
14259 fputs ("ord", file);
14262 output_operand_lossage ("operand is not a condition code, "
14263 "invalid operand code 'D'");
14269 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14270 if (ASSEMBLER_DIALECT == ASM_ATT)
14272 switch (GET_MODE (x))
14274 case HImode: putc ('w', file); break;
14276 case SFmode: putc ('l', file); break;
14278 case DFmode: putc ('q', file); break;
14279 default: gcc_unreachable ();
14286 if (!COMPARISON_P (x))
14288 output_operand_lossage ("operand is neither a constant nor a "
14289 "condition code, invalid operand code "
14293 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
14296 if (!COMPARISON_P (x))
14298 output_operand_lossage ("operand is neither a constant nor a "
14299 "condition code, invalid operand code "
14303 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14304 if (ASSEMBLER_DIALECT == ASM_ATT)
14307 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14310 /* Like above, but reverse condition */
14312 /* Check to see if argument to %c is really a constant
14313 and not a condition code which needs to be reversed. */
14314 if (!COMPARISON_P (x))
14316 output_operand_lossage ("operand is neither a constant nor a "
14317 "condition code, invalid operand "
14321 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14324 if (!COMPARISON_P (x))
14326 output_operand_lossage ("operand is neither a constant nor a "
14327 "condition code, invalid operand "
14331 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14332 if (ASSEMBLER_DIALECT == ASM_ATT)
14335 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14339 if (!offsettable_memref_p (x))
14341 output_operand_lossage ("operand is not an offsettable memory "
14342 "reference, invalid operand "
14346 /* It doesn't actually matter what mode we use here, as we're
14347 only going to use this for printing. */
14348 x = adjust_address_nv (x, DImode, 8);
14356 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14359 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14362 int pred_val = INTVAL (XEXP (x, 0));
14364 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14365 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14367 int taken = pred_val > REG_BR_PROB_BASE / 2;
14368 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14370 /* Emit hints only in the case default branch prediction
14371 heuristics would fail. */
14372 if (taken != cputaken)
14374 /* We use 3e (DS) prefix for taken branches and
14375 2e (CS) prefix for not taken branches. */
14377 fputs ("ds ; ", file);
14379 fputs ("cs ; ", file);
14387 switch (GET_CODE (x))
14390 fputs ("neq", file);
14393 fputs ("eq", file);
14397 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14401 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14405 fputs ("le", file);
14409 fputs ("lt", file);
14412 fputs ("unord", file);
14415 fputs ("ord", file);
14418 fputs ("ueq", file);
14421 fputs ("nlt", file);
14424 fputs ("nle", file);
14427 fputs ("ule", file);
14430 fputs ("ult", file);
14433 fputs ("une", file);
14436 output_operand_lossage ("operand is not a condition code, "
14437 "invalid operand code 'Y'");
14443 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14449 if (ASSEMBLER_DIALECT == ASM_ATT)
14452 /* The kernel uses a different segment register for performance
14453 reasons; a system call would not have to trash the userspace
14454 segment register, which would be expensive. */
14455 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14456 fputs ("fs", file);
14458 fputs ("gs", file);
14462 putc (TARGET_AVX2 ? 'i' : 'f', file);
14466 output_operand_lossage ("invalid operand code '%c'", code);
14471 print_reg (x, code, file);
14473 else if (MEM_P (x))
14475 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14476 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14477 && GET_MODE (x) != BLKmode)
14480 switch (GET_MODE_SIZE (GET_MODE (x)))
14482 case 1: size = "BYTE"; break;
14483 case 2: size = "WORD"; break;
14484 case 4: size = "DWORD"; break;
14485 case 8: size = "QWORD"; break;
14486 case 12: size = "TBYTE"; break;
14488 if (GET_MODE (x) == XFmode)
14493 case 32: size = "YMMWORD"; break;
14495 gcc_unreachable ();
14498 /* Check for explicit size override (codes 'b', 'w', 'k',
14502 else if (code == 'w')
14504 else if (code == 'k')
14506 else if (code == 'q')
14508 else if (code == 'x')
14511 fputs (size, file);
14512 fputs (" PTR ", file);
14516 /* Avoid (%rip) for call operands. */
14517 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14518 && !CONST_INT_P (x))
14519 output_addr_const (file, x);
14520 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14521 output_operand_lossage ("invalid constraints for operand");
14523 output_address (x);
14526 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14531 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14532 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14534 if (ASSEMBLER_DIALECT == ASM_ATT)
14536 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14538 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
14539 (unsigned long long) (int) l);
14541 fprintf (file, "0x%08x", (unsigned int) l);
14544 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14549 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14550 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14552 if (ASSEMBLER_DIALECT == ASM_ATT)
14554 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14557 /* These float cases don't actually occur as immediate operands. */
14558 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14562 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14563 fputs (dstr, file);
14568 /* We have patterns that allow zero sets of memory, for instance.
14569 In 64-bit mode, we should probably support all 8-byte vectors,
14570 since we can in fact encode that into an immediate. */
14571 if (GET_CODE (x) == CONST_VECTOR)
14573 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14577 if (code != 'P' && code != 'p')
14579 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14581 if (ASSEMBLER_DIALECT == ASM_ATT)
14584 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14585 || GET_CODE (x) == LABEL_REF)
14587 if (ASSEMBLER_DIALECT == ASM_ATT)
14590 fputs ("OFFSET FLAT:", file);
14593 if (CONST_INT_P (x))
14594 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14595 else if (flag_pic || MACHOPIC_INDIRECT)
14596 output_pic_addr_const (file, x, code);
14598 output_addr_const (file, x);
14603 ix86_print_operand_punct_valid_p (unsigned char code)
14605 return (code == '@' || code == '*' || code == '+'
14606 || code == '&' || code == ';' || code == '~');
14609 /* Print a memory operand whose address is ADDR. */
14612 ix86_print_operand_address (FILE *file, rtx addr)
14614 struct ix86_address parts;
14615 rtx base, index, disp;
14621 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14623 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14624 gcc_assert (parts.index == NULL_RTX);
14625 parts.index = XVECEXP (addr, 0, 1);
14626 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14627 addr = XVECEXP (addr, 0, 0);
14630 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
14632 gcc_assert (TARGET_64BIT);
14633 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14637 ok = ix86_decompose_address (addr, &parts);
14642 index = parts.index;
14644 scale = parts.scale;
14652 if (ASSEMBLER_DIALECT == ASM_ATT)
14654 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14657 gcc_unreachable ();
14660 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14661 if (TARGET_64BIT && !base && !index)
14665 if (GET_CODE (disp) == CONST
14666 && GET_CODE (XEXP (disp, 0)) == PLUS
14667 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14668 symbol = XEXP (XEXP (disp, 0), 0);
14670 if (GET_CODE (symbol) == LABEL_REF
14671 || (GET_CODE (symbol) == SYMBOL_REF
14672 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14675 if (!base && !index)
14677 /* Displacement only requires special attention. */
14679 if (CONST_INT_P (disp))
14681 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14682 fputs ("ds:", file);
14683 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14686 output_pic_addr_const (file, disp, 0);
14688 output_addr_const (file, disp);
14692 /* Print SImode register names to force addr32 prefix. */
14693 if (SImode_address_operand (addr, VOIDmode))
14695 #ifdef ENABLE_CHECKING
14696 gcc_assert (TARGET_64BIT);
14697 switch (GET_CODE (addr))
14700 gcc_assert (GET_MODE (addr) == SImode);
14701 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
14705 gcc_assert (GET_MODE (addr) == DImode);
14708 gcc_unreachable ();
14711 gcc_assert (!code);
14717 && CONST_INT_P (disp)
14718 && INTVAL (disp) < -16*1024*1024)
14720 /* X32 runs in 64-bit mode, where displacement, DISP, in
14721 address DISP(%r64), is encoded as 32-bit immediate sign-
14722 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14723 address is %r64 + 0xffffffffbffffd00. When %r64 <
14724 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14725 which is invalid for x32. The correct address is %r64
14726 - 0x40000300 == 0xf7ffdd64. To properly encode
14727 -0x40000300(%r64) for x32, we zero-extend negative
14728 displacement by forcing addr32 prefix which truncates
14729 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14730 zero-extend all negative displacements, including -1(%rsp).
14731 However, for small negative displacements, sign-extension
14732 won't cause overflow. We only zero-extend negative
14733 displacements if they < -16*1024*1024, which is also used
14734 to check legitimate address displacements for PIC. */
14738 if (ASSEMBLER_DIALECT == ASM_ATT)
14743 output_pic_addr_const (file, disp, 0);
14744 else if (GET_CODE (disp) == LABEL_REF)
14745 output_asm_label (disp);
14747 output_addr_const (file, disp);
14752 print_reg (base, code, file);
14756 print_reg (index, vsib ? 0 : code, file);
14757 if (scale != 1 || vsib)
14758 fprintf (file, ",%d", scale);
14764 rtx offset = NULL_RTX;
14768 /* Pull out the offset of a symbol; print any symbol itself. */
14769 if (GET_CODE (disp) == CONST
14770 && GET_CODE (XEXP (disp, 0)) == PLUS
14771 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14773 offset = XEXP (XEXP (disp, 0), 1);
14774 disp = gen_rtx_CONST (VOIDmode,
14775 XEXP (XEXP (disp, 0), 0));
14779 output_pic_addr_const (file, disp, 0);
14780 else if (GET_CODE (disp) == LABEL_REF)
14781 output_asm_label (disp);
14782 else if (CONST_INT_P (disp))
14785 output_addr_const (file, disp);
14791 print_reg (base, code, file);
14794 if (INTVAL (offset) >= 0)
14796 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14800 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14807 print_reg (index, vsib ? 0 : code, file);
14808 if (scale != 1 || vsib)
14809 fprintf (file, "*%d", scale);
14816 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14819 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14823 if (GET_CODE (x) != UNSPEC)
14826 op = XVECEXP (x, 0, 0);
14827 switch (XINT (x, 1))
14829 case UNSPEC_GOTTPOFF:
14830 output_addr_const (file, op);
14831 /* FIXME: This might be @TPOFF in Sun ld. */
14832 fputs ("@gottpoff", file);
14835 output_addr_const (file, op);
14836 fputs ("@tpoff", file);
14838 case UNSPEC_NTPOFF:
14839 output_addr_const (file, op);
14841 fputs ("@tpoff", file);
14843 fputs ("@ntpoff", file);
14845 case UNSPEC_DTPOFF:
14846 output_addr_const (file, op);
14847 fputs ("@dtpoff", file);
14849 case UNSPEC_GOTNTPOFF:
14850 output_addr_const (file, op);
14852 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14853 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14855 fputs ("@gotntpoff", file);
14857 case UNSPEC_INDNTPOFF:
14858 output_addr_const (file, op);
14859 fputs ("@indntpoff", file);
14862 case UNSPEC_MACHOPIC_OFFSET:
14863 output_addr_const (file, op);
14865 machopic_output_function_base_name (file);
14869 case UNSPEC_STACK_CHECK:
14873 gcc_assert (flag_split_stack);
14875 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14876 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14878 gcc_unreachable ();
14881 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14892 /* Split one or more double-mode RTL references into pairs of half-mode
14893 references. The RTL can be REG, offsettable MEM, integer constant, or
14894 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14895 split and "num" is its length. lo_half and hi_half are output arrays
14896 that parallel "operands". */
14899 split_double_mode (enum machine_mode mode, rtx operands[],
14900 int num, rtx lo_half[], rtx hi_half[])
14902 enum machine_mode half_mode;
14908 half_mode = DImode;
14911 half_mode = SImode;
14914 gcc_unreachable ();
14917 byte = GET_MODE_SIZE (half_mode);
14921 rtx op = operands[num];
14923 /* simplify_subreg refuse to split volatile memory addresses,
14924 but we still have to handle it. */
14927 lo_half[num] = adjust_address (op, half_mode, 0);
14928 hi_half[num] = adjust_address (op, half_mode, byte);
14932 lo_half[num] = simplify_gen_subreg (half_mode, op,
14933 GET_MODE (op) == VOIDmode
14934 ? mode : GET_MODE (op), 0);
14935 hi_half[num] = simplify_gen_subreg (half_mode, op,
14936 GET_MODE (op) == VOIDmode
14937 ? mode : GET_MODE (op), byte);
14942 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14943 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14944 is the expression of the binary operation. The output may either be
14945 emitted here, or returned to the caller, like all output_* functions.
14947 There is no guarantee that the operands are the same mode, as they
14948 might be within FLOAT or FLOAT_EXTEND expressions. */
14950 #ifndef SYSV386_COMPAT
14951 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14952 wants to fix the assemblers because that causes incompatibility
14953 with gcc. No-one wants to fix gcc because that causes
14954 incompatibility with assemblers... You can use the option of
14955 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14956 #define SYSV386_COMPAT 1
14960 output_387_binary_op (rtx insn, rtx *operands)
14962 static char buf[40];
14965 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14967 #ifdef ENABLE_CHECKING
14968 /* Even if we do not want to check the inputs, this documents input
14969 constraints. Which helps in understanding the following code. */
14970 if (STACK_REG_P (operands[0])
14971 && ((REG_P (operands[1])
14972 && REGNO (operands[0]) == REGNO (operands[1])
14973 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14974 || (REG_P (operands[2])
14975 && REGNO (operands[0]) == REGNO (operands[2])
14976 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14977 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14980 gcc_assert (is_sse);
14983 switch (GET_CODE (operands[3]))
14986 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14987 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14995 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14996 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15004 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15005 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15013 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15014 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15022 gcc_unreachable ();
15029 strcpy (buf, ssep);
15030 if (GET_MODE (operands[0]) == SFmode)
15031 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15033 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15037 strcpy (buf, ssep + 1);
15038 if (GET_MODE (operands[0]) == SFmode)
15039 strcat (buf, "ss\t{%2, %0|%0, %2}");
15041 strcat (buf, "sd\t{%2, %0|%0, %2}");
15047 switch (GET_CODE (operands[3]))
15051 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15053 rtx temp = operands[2];
15054 operands[2] = operands[1];
15055 operands[1] = temp;
15058 /* know operands[0] == operands[1]. */
15060 if (MEM_P (operands[2]))
15066 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15068 if (STACK_TOP_P (operands[0]))
15069 /* How is it that we are storing to a dead operand[2]?
15070 Well, presumably operands[1] is dead too. We can't
15071 store the result to st(0) as st(0) gets popped on this
15072 instruction. Instead store to operands[2] (which I
15073 think has to be st(1)). st(1) will be popped later.
15074 gcc <= 2.8.1 didn't have this check and generated
15075 assembly code that the Unixware assembler rejected. */
15076 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15078 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15082 if (STACK_TOP_P (operands[0]))
15083 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15085 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15090 if (MEM_P (operands[1]))
15096 if (MEM_P (operands[2]))
15102 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15105 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15106 derived assemblers, confusingly reverse the direction of
15107 the operation for fsub{r} and fdiv{r} when the
15108 destination register is not st(0). The Intel assembler
15109 doesn't have this brain damage. Read !SYSV386_COMPAT to
15110 figure out what the hardware really does. */
15111 if (STACK_TOP_P (operands[0]))
15112 p = "{p\t%0, %2|rp\t%2, %0}";
15114 p = "{rp\t%2, %0|p\t%0, %2}";
15116 if (STACK_TOP_P (operands[0]))
15117 /* As above for fmul/fadd, we can't store to st(0). */
15118 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15120 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15125 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15128 if (STACK_TOP_P (operands[0]))
15129 p = "{rp\t%0, %1|p\t%1, %0}";
15131 p = "{p\t%1, %0|rp\t%0, %1}";
15133 if (STACK_TOP_P (operands[0]))
15134 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15136 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15141 if (STACK_TOP_P (operands[0]))
15143 if (STACK_TOP_P (operands[1]))
15144 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15146 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15149 else if (STACK_TOP_P (operands[1]))
15152 p = "{\t%1, %0|r\t%0, %1}";
15154 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15160 p = "{r\t%2, %0|\t%0, %2}";
15162 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15168 gcc_unreachable ();
15175 /* Return needed mode for entity in optimize_mode_switching pass. */
15178 ix86_mode_needed (int entity, rtx insn)
15180 enum attr_i387_cw mode;
15182 /* The mode UNINITIALIZED is used to store control word after a
15183 function call or ASM pattern. The mode ANY specify that function
15184 has no requirements on the control word and make no changes in the
15185 bits we are interested in. */
15188 || (NONJUMP_INSN_P (insn)
15189 && (asm_noperands (PATTERN (insn)) >= 0
15190 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15191 return I387_CW_UNINITIALIZED;
15193 if (recog_memoized (insn) < 0)
15194 return I387_CW_ANY;
15196 mode = get_attr_i387_cw (insn);
15201 if (mode == I387_CW_TRUNC)
15206 if (mode == I387_CW_FLOOR)
15211 if (mode == I387_CW_CEIL)
15216 if (mode == I387_CW_MASK_PM)
15221 gcc_unreachable ();
15224 return I387_CW_ANY;
15227 /* Output code to initialize control word copies used by trunc?f?i and
15228 rounding patterns. CURRENT_MODE is set to current control word,
15229 while NEW_MODE is set to new control word. */
15232 emit_i387_cw_initialization (int mode)
15234 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15237 enum ix86_stack_slot slot;
15239 rtx reg = gen_reg_rtx (HImode);
15241 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15242 emit_move_insn (reg, copy_rtx (stored_mode));
15244 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15245 || optimize_function_for_size_p (cfun))
15249 case I387_CW_TRUNC:
15250 /* round toward zero (truncate) */
15251 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15252 slot = SLOT_CW_TRUNC;
15255 case I387_CW_FLOOR:
15256 /* round down toward -oo */
15257 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15258 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15259 slot = SLOT_CW_FLOOR;
15263 /* round up toward +oo */
15264 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15265 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15266 slot = SLOT_CW_CEIL;
15269 case I387_CW_MASK_PM:
15270 /* mask precision exception for nearbyint() */
15271 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15272 slot = SLOT_CW_MASK_PM;
15276 gcc_unreachable ();
15283 case I387_CW_TRUNC:
15284 /* round toward zero (truncate) */
15285 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15286 slot = SLOT_CW_TRUNC;
15289 case I387_CW_FLOOR:
15290 /* round down toward -oo */
15291 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15292 slot = SLOT_CW_FLOOR;
15296 /* round up toward +oo */
15297 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15298 slot = SLOT_CW_CEIL;
15301 case I387_CW_MASK_PM:
15302 /* mask precision exception for nearbyint() */
15303 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15304 slot = SLOT_CW_MASK_PM;
15308 gcc_unreachable ();
15312 gcc_assert (slot < MAX_386_STACK_LOCALS);
15314 new_mode = assign_386_stack_local (HImode, slot);
15315 emit_move_insn (new_mode, reg);
15318 /* Output code for INSN to convert a float to a signed int. OPERANDS
15319 are the insn operands. The output may be [HSD]Imode and the input
15320 operand may be [SDX]Fmode. */
15323 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
15325 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15326 int dimode_p = GET_MODE (operands[0]) == DImode;
15327 int round_mode = get_attr_i387_cw (insn);
15329 /* Jump through a hoop or two for DImode, since the hardware has no
15330 non-popping instruction. We used to do this a different way, but
15331 that was somewhat fragile and broke with post-reload splitters. */
15332 if ((dimode_p || fisttp) && !stack_top_dies)
15333 output_asm_insn ("fld\t%y1", operands);
15335 gcc_assert (STACK_TOP_P (operands[1]));
15336 gcc_assert (MEM_P (operands[0]));
15337 gcc_assert (GET_MODE (operands[1]) != TFmode);
15340 output_asm_insn ("fisttp%Z0\t%0", operands);
15343 if (round_mode != I387_CW_ANY)
15344 output_asm_insn ("fldcw\t%3", operands);
15345 if (stack_top_dies || dimode_p)
15346 output_asm_insn ("fistp%Z0\t%0", operands);
15348 output_asm_insn ("fist%Z0\t%0", operands);
15349 if (round_mode != I387_CW_ANY)
15350 output_asm_insn ("fldcw\t%2", operands);
15356 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15357 have the values zero or one, indicates the ffreep insn's operand
15358 from the OPERANDS array. */
15360 static const char *
15361 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15363 if (TARGET_USE_FFREEP)
15364 #ifdef HAVE_AS_IX86_FFREEP
15365 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15368 static char retval[32];
15369 int regno = REGNO (operands[opno]);
15371 gcc_assert (FP_REGNO_P (regno));
15373 regno -= FIRST_STACK_REG;
15375 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15380 return opno ? "fstp\t%y1" : "fstp\t%y0";
15384 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15385 should be used. UNORDERED_P is true when fucom should be used. */
15388 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
15390 int stack_top_dies;
15391 rtx cmp_op0, cmp_op1;
15392 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15396 cmp_op0 = operands[0];
15397 cmp_op1 = operands[1];
15401 cmp_op0 = operands[1];
15402 cmp_op1 = operands[2];
15407 if (GET_MODE (operands[0]) == SFmode)
15409 return "%vucomiss\t{%1, %0|%0, %1}";
15411 return "%vcomiss\t{%1, %0|%0, %1}";
15414 return "%vucomisd\t{%1, %0|%0, %1}";
15416 return "%vcomisd\t{%1, %0|%0, %1}";
15419 gcc_assert (STACK_TOP_P (cmp_op0));
15421 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15423 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15425 if (stack_top_dies)
15427 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15428 return output_387_ffreep (operands, 1);
15431 return "ftst\n\tfnstsw\t%0";
15434 if (STACK_REG_P (cmp_op1)
15436 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15437 && REGNO (cmp_op1) != FIRST_STACK_REG)
15439 /* If both the top of the 387 stack dies, and the other operand
15440 is also a stack register that dies, then this must be a
15441 `fcompp' float compare */
15445 /* There is no double popping fcomi variant. Fortunately,
15446 eflags is immune from the fstp's cc clobbering. */
15448 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15450 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15451 return output_387_ffreep (operands, 0);
15456 return "fucompp\n\tfnstsw\t%0";
15458 return "fcompp\n\tfnstsw\t%0";
15463 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15465 static const char * const alt[16] =
15467 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15468 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15469 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15470 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15472 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15473 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15477 "fcomi\t{%y1, %0|%0, %y1}",
15478 "fcomip\t{%y1, %0|%0, %y1}",
15479 "fucomi\t{%y1, %0|%0, %y1}",
15480 "fucomip\t{%y1, %0|%0, %y1}",
15491 mask = eflags_p << 3;
15492 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15493 mask |= unordered_p << 1;
15494 mask |= stack_top_dies;
15496 gcc_assert (mask < 16);
15505 ix86_output_addr_vec_elt (FILE *file, int value)
15507 const char *directive = ASM_LONG;
15511 directive = ASM_QUAD;
15513 gcc_assert (!TARGET_64BIT);
15516 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15520 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15522 const char *directive = ASM_LONG;
15525 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15526 directive = ASM_QUAD;
15528 gcc_assert (!TARGET_64BIT);
15530 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15531 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15532 fprintf (file, "%s%s%d-%s%d\n",
15533 directive, LPREFIX, value, LPREFIX, rel);
15534 else if (HAVE_AS_GOTOFF_IN_DATA)
15535 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15537 else if (TARGET_MACHO)
15539 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15540 machopic_output_function_base_name (file);
15545 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15546 GOT_SYMBOL_NAME, LPREFIX, value);
15549 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15553 ix86_expand_clear (rtx dest)
15557 /* We play register width games, which are only valid after reload. */
15558 gcc_assert (reload_completed);
15560 /* Avoid HImode and its attendant prefix byte. */
15561 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15562 dest = gen_rtx_REG (SImode, REGNO (dest));
15563 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15565 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15566 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15568 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15569 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15575 /* X is an unchanging MEM. If it is a constant pool reference, return
15576 the constant pool rtx, else NULL. */
15579 maybe_get_pool_constant (rtx x)
15581 x = ix86_delegitimize_address (XEXP (x, 0));
15583 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15584 return get_pool_constant (x);
15590 ix86_expand_move (enum machine_mode mode, rtx operands[])
15593 enum tls_model model;
15598 if (GET_CODE (op1) == SYMBOL_REF)
15600 model = SYMBOL_REF_TLS_MODEL (op1);
15603 op1 = legitimize_tls_address (op1, model, true);
15604 op1 = force_operand (op1, op0);
15607 if (GET_MODE (op1) != mode)
15608 op1 = convert_to_mode (mode, op1, 1);
15610 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15611 && SYMBOL_REF_DLLIMPORT_P (op1))
15612 op1 = legitimize_dllimport_symbol (op1, false);
15614 else if (GET_CODE (op1) == CONST
15615 && GET_CODE (XEXP (op1, 0)) == PLUS
15616 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15618 rtx addend = XEXP (XEXP (op1, 0), 1);
15619 rtx symbol = XEXP (XEXP (op1, 0), 0);
15622 model = SYMBOL_REF_TLS_MODEL (symbol);
15624 tmp = legitimize_tls_address (symbol, model, true);
15625 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15626 && SYMBOL_REF_DLLIMPORT_P (symbol))
15627 tmp = legitimize_dllimport_symbol (symbol, true);
15631 tmp = force_operand (tmp, NULL);
15632 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15633 op0, 1, OPTAB_DIRECT);
15636 op1 = convert_to_mode (mode, tmp, 1);
15640 if ((flag_pic || MACHOPIC_INDIRECT)
15641 && symbolic_operand (op1, mode))
15643 if (TARGET_MACHO && !TARGET_64BIT)
15646 /* dynamic-no-pic */
15647 if (MACHOPIC_INDIRECT)
15649 rtx temp = ((reload_in_progress
15650 || ((op0 && REG_P (op0))
15652 ? op0 : gen_reg_rtx (Pmode));
15653 op1 = machopic_indirect_data_reference (op1, temp);
15655 op1 = machopic_legitimize_pic_address (op1, mode,
15656 temp == op1 ? 0 : temp);
15658 if (op0 != op1 && GET_CODE (op0) != MEM)
15660 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15664 if (GET_CODE (op0) == MEM)
15665 op1 = force_reg (Pmode, op1);
15669 if (GET_CODE (temp) != REG)
15670 temp = gen_reg_rtx (Pmode);
15671 temp = legitimize_pic_address (op1, temp);
15676 /* dynamic-no-pic */
15682 op1 = force_reg (mode, op1);
15683 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15685 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15686 op1 = legitimize_pic_address (op1, reg);
15689 if (GET_MODE (op1) != mode)
15690 op1 = convert_to_mode (mode, op1, 1);
15697 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15698 || !push_operand (op0, mode))
15700 op1 = force_reg (mode, op1);
15702 if (push_operand (op0, mode)
15703 && ! general_no_elim_operand (op1, mode))
15704 op1 = copy_to_mode_reg (mode, op1);
15706 /* Force large constants in 64bit compilation into register
15707 to get them CSEed. */
15708 if (can_create_pseudo_p ()
15709 && (mode == DImode) && TARGET_64BIT
15710 && immediate_operand (op1, mode)
15711 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15712 && !register_operand (op0, mode)
15714 op1 = copy_to_mode_reg (mode, op1);
15716 if (can_create_pseudo_p ()
15717 && FLOAT_MODE_P (mode)
15718 && GET_CODE (op1) == CONST_DOUBLE)
15720 /* If we are loading a floating point constant to a register,
15721 force the value to memory now, since we'll get better code
15722 out the back end. */
15724 op1 = validize_mem (force_const_mem (mode, op1));
15725 if (!register_operand (op0, mode))
15727 rtx temp = gen_reg_rtx (mode);
15728 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15729 emit_move_insn (op0, temp);
15735 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15739 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15741 rtx op0 = operands[0], op1 = operands[1];
15742 unsigned int align = GET_MODE_ALIGNMENT (mode);
15744 /* Force constants other than zero into memory. We do not know how
15745 the instructions used to build constants modify the upper 64 bits
15746 of the register, once we have that information we may be able
15747 to handle some of them more efficiently. */
15748 if (can_create_pseudo_p ()
15749 && register_operand (op0, mode)
15750 && (CONSTANT_P (op1)
15751 || (GET_CODE (op1) == SUBREG
15752 && CONSTANT_P (SUBREG_REG (op1))))
15753 && !standard_sse_constant_p (op1))
15754 op1 = validize_mem (force_const_mem (mode, op1));
15756 /* We need to check memory alignment for SSE mode since attribute
15757 can make operands unaligned. */
15758 if (can_create_pseudo_p ()
15759 && SSE_REG_MODE_P (mode)
15760 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15761 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15765 /* ix86_expand_vector_move_misalign() does not like constants ... */
15766 if (CONSTANT_P (op1)
15767 || (GET_CODE (op1) == SUBREG
15768 && CONSTANT_P (SUBREG_REG (op1))))
15769 op1 = validize_mem (force_const_mem (mode, op1));
15771 /* ... nor both arguments in memory. */
15772 if (!register_operand (op0, mode)
15773 && !register_operand (op1, mode))
15774 op1 = force_reg (mode, op1);
15776 tmp[0] = op0; tmp[1] = op1;
15777 ix86_expand_vector_move_misalign (mode, tmp);
15781 /* Make operand1 a register if it isn't already. */
15782 if (can_create_pseudo_p ()
15783 && !register_operand (op0, mode)
15784 && !register_operand (op1, mode))
15786 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15790 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15793 /* Split 32-byte AVX unaligned load and store if needed. */
15796 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15799 rtx (*extract) (rtx, rtx, rtx);
15800 rtx (*load_unaligned) (rtx, rtx);
15801 rtx (*store_unaligned) (rtx, rtx);
15802 enum machine_mode mode;
15804 switch (GET_MODE (op0))
15807 gcc_unreachable ();
15809 extract = gen_avx_vextractf128v32qi;
15810 load_unaligned = gen_avx_loaddqu256;
15811 store_unaligned = gen_avx_storedqu256;
15815 extract = gen_avx_vextractf128v8sf;
15816 load_unaligned = gen_avx_loadups256;
15817 store_unaligned = gen_avx_storeups256;
15821 extract = gen_avx_vextractf128v4df;
15822 load_unaligned = gen_avx_loadupd256;
15823 store_unaligned = gen_avx_storeupd256;
15830 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15832 rtx r = gen_reg_rtx (mode);
15833 m = adjust_address (op1, mode, 0);
15834 emit_move_insn (r, m);
15835 m = adjust_address (op1, mode, 16);
15836 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15837 emit_move_insn (op0, r);
15840 emit_insn (load_unaligned (op0, op1));
15842 else if (MEM_P (op0))
15844 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15846 m = adjust_address (op0, mode, 0);
15847 emit_insn (extract (m, op1, const0_rtx));
15848 m = adjust_address (op0, mode, 16);
15849 emit_insn (extract (m, op1, const1_rtx));
15852 emit_insn (store_unaligned (op0, op1));
15855 gcc_unreachable ();
15858 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15859 straight to ix86_expand_vector_move. */
15860 /* Code generation for scalar reg-reg moves of single and double precision data:
15861 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15865 if (x86_sse_partial_reg_dependency == true)
15870 Code generation for scalar loads of double precision data:
15871 if (x86_sse_split_regs == true)
15872 movlpd mem, reg (gas syntax)
15876 Code generation for unaligned packed loads of single precision data
15877 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15878 if (x86_sse_unaligned_move_optimal)
15881 if (x86_sse_partial_reg_dependency == true)
15893 Code generation for unaligned packed loads of double precision data
15894 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15895 if (x86_sse_unaligned_move_optimal)
15898 if (x86_sse_split_regs == true)
15911 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15914 rtx (*move_unaligned) (rtx, rtx);
15921 switch (GET_MODE_CLASS (mode))
15923 case MODE_VECTOR_INT:
15925 switch (GET_MODE_SIZE (mode))
15928 /* If we're optimizing for size, movups is the smallest. */
15929 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15932 move_unaligned = gen_sse_loadups;
15933 else if (MEM_P (op0))
15934 move_unaligned = gen_sse_storeups;
15936 gcc_unreachable ();
15938 op0 = gen_lowpart (V4SFmode, op0);
15939 op1 = gen_lowpart (V4SFmode, op1);
15940 emit_insn (move_unaligned (op0, op1));
15944 move_unaligned = gen_sse2_loaddqu;
15945 else if (MEM_P (op0))
15946 move_unaligned = gen_sse2_storedqu;
15948 gcc_unreachable ();
15950 op0 = gen_lowpart (V16QImode, op0);
15951 op1 = gen_lowpart (V16QImode, op1);
15952 emit_insn (move_unaligned (op0, op1));
15955 op0 = gen_lowpart (V32QImode, op0);
15956 op1 = gen_lowpart (V32QImode, op1);
15957 ix86_avx256_split_vector_move_misalign (op0, op1);
15960 gcc_unreachable ();
15963 case MODE_VECTOR_FLOAT:
15964 op0 = gen_lowpart (mode, op0);
15965 op1 = gen_lowpart (mode, op1);
15971 move_unaligned = gen_sse_loadups;
15972 else if (MEM_P (op0))
15973 move_unaligned = gen_sse_storeups;
15975 gcc_unreachable ();
15977 emit_insn (move_unaligned (op0, op1));
15980 ix86_avx256_split_vector_move_misalign (op0, op1);
15983 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15986 move_unaligned = gen_sse_loadups;
15987 else if (MEM_P (op0))
15988 move_unaligned = gen_sse_storeups;
15990 gcc_unreachable ();
15992 op0 = gen_lowpart (V4SFmode, op0);
15993 op1 = gen_lowpart (V4SFmode, op1);
15994 emit_insn (move_unaligned (op0, op1));
15998 move_unaligned = gen_sse2_loadupd;
15999 else if (MEM_P (op0))
16000 move_unaligned = gen_sse2_storeupd;
16002 gcc_unreachable ();
16004 emit_insn (move_unaligned (op0, op1));
16007 ix86_avx256_split_vector_move_misalign (op0, op1);
16010 gcc_unreachable ();
16015 gcc_unreachable ();
16023 /* If we're optimizing for size, movups is the smallest. */
16024 if (optimize_insn_for_size_p ()
16025 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
16027 op0 = gen_lowpart (V4SFmode, op0);
16028 op1 = gen_lowpart (V4SFmode, op1);
16029 emit_insn (gen_sse_loadups (op0, op1));
16033 /* ??? If we have typed data, then it would appear that using
16034 movdqu is the only way to get unaligned data loaded with
16036 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16038 op0 = gen_lowpart (V16QImode, op0);
16039 op1 = gen_lowpart (V16QImode, op1);
16040 emit_insn (gen_sse2_loaddqu (op0, op1));
16044 if (TARGET_SSE2 && mode == V2DFmode)
16048 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
16050 op0 = gen_lowpart (V2DFmode, op0);
16051 op1 = gen_lowpart (V2DFmode, op1);
16052 emit_insn (gen_sse2_loadupd (op0, op1));
16056 /* When SSE registers are split into halves, we can avoid
16057 writing to the top half twice. */
16058 if (TARGET_SSE_SPLIT_REGS)
16060 emit_clobber (op0);
16065 /* ??? Not sure about the best option for the Intel chips.
16066 The following would seem to satisfy; the register is
16067 entirely cleared, breaking the dependency chain. We
16068 then store to the upper half, with a dependency depth
16069 of one. A rumor has it that Intel recommends two movsd
16070 followed by an unpacklpd, but this is unconfirmed. And
16071 given that the dependency depth of the unpacklpd would
16072 still be one, I'm not sure why this would be better. */
16073 zero = CONST0_RTX (V2DFmode);
16076 m = adjust_address (op1, DFmode, 0);
16077 emit_insn (gen_sse2_loadlpd (op0, zero, m));
16078 m = adjust_address (op1, DFmode, 8);
16079 emit_insn (gen_sse2_loadhpd (op0, op0, m));
16083 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
16085 op0 = gen_lowpart (V4SFmode, op0);
16086 op1 = gen_lowpart (V4SFmode, op1);
16087 emit_insn (gen_sse_loadups (op0, op1));
16091 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
16092 emit_move_insn (op0, CONST0_RTX (mode));
16094 emit_clobber (op0);
16096 if (mode != V4SFmode)
16097 op0 = gen_lowpart (V4SFmode, op0);
16098 m = adjust_address (op1, V2SFmode, 0);
16099 emit_insn (gen_sse_loadlps (op0, op0, m));
16100 m = adjust_address (op1, V2SFmode, 8);
16101 emit_insn (gen_sse_loadhps (op0, op0, m));
16104 else if (MEM_P (op0))
16106 /* If we're optimizing for size, movups is the smallest. */
16107 if (optimize_insn_for_size_p ()
16108 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
16110 op0 = gen_lowpart (V4SFmode, op0);
16111 op1 = gen_lowpart (V4SFmode, op1);
16112 emit_insn (gen_sse_storeups (op0, op1));
16116 /* ??? Similar to above, only less clear because of quote
16117 typeless stores unquote. */
16118 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
16119 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16121 op0 = gen_lowpart (V16QImode, op0);
16122 op1 = gen_lowpart (V16QImode, op1);
16123 emit_insn (gen_sse2_storedqu (op0, op1));
16127 if (TARGET_SSE2 && mode == V2DFmode)
16129 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
16131 op0 = gen_lowpart (V2DFmode, op0);
16132 op1 = gen_lowpart (V2DFmode, op1);
16133 emit_insn (gen_sse2_storeupd (op0, op1));
16137 m = adjust_address (op0, DFmode, 0);
16138 emit_insn (gen_sse2_storelpd (m, op1));
16139 m = adjust_address (op0, DFmode, 8);
16140 emit_insn (gen_sse2_storehpd (m, op1));
16145 if (mode != V4SFmode)
16146 op1 = gen_lowpart (V4SFmode, op1);
16148 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
16150 op0 = gen_lowpart (V4SFmode, op0);
16151 emit_insn (gen_sse_storeups (op0, op1));
16155 m = adjust_address (op0, V2SFmode, 0);
16156 emit_insn (gen_sse_storelps (m, op1));
16157 m = adjust_address (op0, V2SFmode, 8);
16158 emit_insn (gen_sse_storehps (m, op1));
16163 gcc_unreachable ();
16166 /* Expand a push in MODE. This is some mode for which we do not support
16167 proper push instructions, at least from the registers that we expect
16168 the value to live in. */
16171 ix86_expand_push (enum machine_mode mode, rtx x)
16175 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
16176 GEN_INT (-GET_MODE_SIZE (mode)),
16177 stack_pointer_rtx, 1, OPTAB_DIRECT);
16178 if (tmp != stack_pointer_rtx)
16179 emit_move_insn (stack_pointer_rtx, tmp);
16181 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
16183 /* When we push an operand onto stack, it has to be aligned at least
16184 at the function argument boundary. However since we don't have
16185 the argument type, we can't determine the actual argument
16187 emit_move_insn (tmp, x);
16190 /* Helper function of ix86_fixup_binary_operands to canonicalize
16191 operand order. Returns true if the operands should be swapped. */
16194 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
16197 rtx dst = operands[0];
16198 rtx src1 = operands[1];
16199 rtx src2 = operands[2];
16201 /* If the operation is not commutative, we can't do anything. */
16202 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
16205 /* Highest priority is that src1 should match dst. */
16206 if (rtx_equal_p (dst, src1))
16208 if (rtx_equal_p (dst, src2))
16211 /* Next highest priority is that immediate constants come second. */
16212 if (immediate_operand (src2, mode))
16214 if (immediate_operand (src1, mode))
16217 /* Lowest priority is that memory references should come second. */
16227 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16228 destination to use for the operation. If different from the true
16229 destination in operands[0], a copy operation will be required. */
16232 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
16235 rtx dst = operands[0];
16236 rtx src1 = operands[1];
16237 rtx src2 = operands[2];
16239 /* Canonicalize operand order. */
16240 if (ix86_swap_binary_operands_p (code, mode, operands))
16244 /* It is invalid to swap operands of different modes. */
16245 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
16252 /* Both source operands cannot be in memory. */
16253 if (MEM_P (src1) && MEM_P (src2))
16255 /* Optimization: Only read from memory once. */
16256 if (rtx_equal_p (src1, src2))
16258 src2 = force_reg (mode, src2);
16262 src2 = force_reg (mode, src2);
16265 /* If the destination is memory, and we do not have matching source
16266 operands, do things in registers. */
16267 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16268 dst = gen_reg_rtx (mode);
16270 /* Source 1 cannot be a constant. */
16271 if (CONSTANT_P (src1))
16272 src1 = force_reg (mode, src1);
16274 /* Source 1 cannot be a non-matching memory. */
16275 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16276 src1 = force_reg (mode, src1);
16278 /* Improve address combine. */
16280 && GET_MODE_CLASS (mode) == MODE_INT
16282 src2 = force_reg (mode, src2);
16284 operands[1] = src1;
16285 operands[2] = src2;
16289 /* Similarly, but assume that the destination has already been
16290 set up properly. */
16293 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
16294 enum machine_mode mode, rtx operands[])
16296 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
16297 gcc_assert (dst == operands[0]);
16300 /* Attempt to expand a binary operator. Make the expansion closer to the
16301 actual machine, then just general_operand, which will allow 3 separate
16302 memory references (one output, two input) in a single insn. */
16305 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
16308 rtx src1, src2, dst, op, clob;
16310 dst = ix86_fixup_binary_operands (code, mode, operands);
16311 src1 = operands[1];
16312 src2 = operands[2];
16314 /* Emit the instruction. */
16316 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
16317 if (reload_in_progress)
16319 /* Reload doesn't know about the flags register, and doesn't know that
16320 it doesn't want to clobber it. We can only do this with PLUS. */
16321 gcc_assert (code == PLUS);
16324 else if (reload_completed
16326 && !rtx_equal_p (dst, src1))
16328 /* This is going to be an LEA; avoid splitting it later. */
16333 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16334 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16337 /* Fix up the destination if needed. */
16338 if (dst != operands[0])
16339 emit_move_insn (operands[0], dst);
16342 /* Return TRUE or FALSE depending on whether the binary operator meets the
16343 appropriate constraints. */
16346 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16349 rtx dst = operands[0];
16350 rtx src1 = operands[1];
16351 rtx src2 = operands[2];
16353 /* Both source operands cannot be in memory. */
16354 if (MEM_P (src1) && MEM_P (src2))
16357 /* Canonicalize operand order for commutative operators. */
16358 if (ix86_swap_binary_operands_p (code, mode, operands))
16365 /* If the destination is memory, we must have a matching source operand. */
16366 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16369 /* Source 1 cannot be a constant. */
16370 if (CONSTANT_P (src1))
16373 /* Source 1 cannot be a non-matching memory. */
16374 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16375 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16376 return (code == AND
16379 || (TARGET_64BIT && mode == DImode))
16380 && satisfies_constraint_L (src2));
16385 /* Attempt to expand a unary operator. Make the expansion closer to the
16386 actual machine, then just general_operand, which will allow 2 separate
16387 memory references (one output, one input) in a single insn. */
16390 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16393 int matching_memory;
16394 rtx src, dst, op, clob;
16399 /* If the destination is memory, and we do not have matching source
16400 operands, do things in registers. */
16401 matching_memory = 0;
16404 if (rtx_equal_p (dst, src))
16405 matching_memory = 1;
16407 dst = gen_reg_rtx (mode);
16410 /* When source operand is memory, destination must match. */
16411 if (MEM_P (src) && !matching_memory)
16412 src = force_reg (mode, src);
16414 /* Emit the instruction. */
16416 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16417 if (reload_in_progress || code == NOT)
16419 /* Reload doesn't know about the flags register, and doesn't know that
16420 it doesn't want to clobber it. */
16421 gcc_assert (code == NOT);
16426 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16427 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16430 /* Fix up the destination if needed. */
16431 if (dst != operands[0])
16432 emit_move_insn (operands[0], dst);
16435 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16436 divisor are within the range [0-255]. */
16439 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16442 rtx end_label, qimode_label;
16443 rtx insn, div, mod;
16444 rtx scratch, tmp0, tmp1, tmp2;
16445 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16446 rtx (*gen_zero_extend) (rtx, rtx);
16447 rtx (*gen_test_ccno_1) (rtx, rtx);
16452 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16453 gen_test_ccno_1 = gen_testsi_ccno_1;
16454 gen_zero_extend = gen_zero_extendqisi2;
16457 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16458 gen_test_ccno_1 = gen_testdi_ccno_1;
16459 gen_zero_extend = gen_zero_extendqidi2;
16462 gcc_unreachable ();
16465 end_label = gen_label_rtx ();
16466 qimode_label = gen_label_rtx ();
16468 scratch = gen_reg_rtx (mode);
16470 /* Use 8bit unsigned divimod if dividend and divisor are within
16471 the range [0-255]. */
16472 emit_move_insn (scratch, operands[2]);
16473 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16474 scratch, 1, OPTAB_DIRECT);
16475 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16476 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16477 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16478 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16479 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16481 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16482 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16483 JUMP_LABEL (insn) = qimode_label;
16485 /* Generate original signed/unsigned divimod. */
16486 div = gen_divmod4_1 (operands[0], operands[1],
16487 operands[2], operands[3]);
16490 /* Branch to the end. */
16491 emit_jump_insn (gen_jump (end_label));
16494 /* Generate 8bit unsigned divide. */
16495 emit_label (qimode_label);
16496 /* Don't use operands[0] for result of 8bit divide since not all
16497 registers support QImode ZERO_EXTRACT. */
16498 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16499 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16500 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16501 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16505 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16506 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16510 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16511 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16514 /* Extract remainder from AH. */
16515 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16516 if (REG_P (operands[1]))
16517 insn = emit_move_insn (operands[1], tmp1);
16520 /* Need a new scratch register since the old one has result
16522 scratch = gen_reg_rtx (mode);
16523 emit_move_insn (scratch, tmp1);
16524 insn = emit_move_insn (operands[1], scratch);
16526 set_unique_reg_note (insn, REG_EQUAL, mod);
16528 /* Zero extend quotient from AL. */
16529 tmp1 = gen_lowpart (QImode, tmp0);
16530 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16531 set_unique_reg_note (insn, REG_EQUAL, div);
16533 emit_label (end_label);
16536 #define LEA_MAX_STALL (3)
16537 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16539 /* Increase given DISTANCE in half-cycles according to
16540 dependencies between PREV and NEXT instructions.
16541 Add 1 half-cycle if there is no dependency and
16542 go to next cycle if there is some dependecy. */
16544 static unsigned int
16545 increase_distance (rtx prev, rtx next, unsigned int distance)
16550 if (!prev || !next)
16551 return distance + (distance & 1) + 2;
16553 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
16554 return distance + 1;
16556 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16557 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16558 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
16559 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
16560 return distance + (distance & 1) + 2;
16562 return distance + 1;
16565 /* Function checks if instruction INSN defines register number
16566 REGNO1 or REGNO2. */
16569 insn_defines_reg (unsigned int regno1, unsigned int regno2,
16574 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
16575 if (DF_REF_REG_DEF_P (*def_rec)
16576 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16577 && (regno1 == DF_REF_REGNO (*def_rec)
16578 || regno2 == DF_REF_REGNO (*def_rec)))
16586 /* Function checks if instruction INSN uses register number
16587 REGNO as a part of address expression. */
16590 insn_uses_reg_mem (unsigned int regno, rtx insn)
16594 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
16595 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
16601 /* Search backward for non-agu definition of register number REGNO1
16602 or register number REGNO2 in basic block starting from instruction
16603 START up to head of basic block or instruction INSN.
16605 Function puts true value into *FOUND var if definition was found
16606 and false otherwise.
16608 Distance in half-cycles between START and found instruction or head
16609 of BB is added to DISTANCE and returned. */
16612 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16613 rtx insn, int distance,
16614 rtx start, bool *found)
16616 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16624 && distance < LEA_SEARCH_THRESHOLD)
16626 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16628 distance = increase_distance (prev, next, distance);
16629 if (insn_defines_reg (regno1, regno2, prev))
16631 if (recog_memoized (prev) < 0
16632 || get_attr_type (prev) != TYPE_LEA)
16641 if (prev == BB_HEAD (bb))
16644 prev = PREV_INSN (prev);
16650 /* Search backward for non-agu definition of register number REGNO1
16651 or register number REGNO2 in INSN's basic block until
16652 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16653 2. Reach neighbour BBs boundary, or
16654 3. Reach agu definition.
16655 Returns the distance between the non-agu definition point and INSN.
16656 If no definition point, returns -1. */
16659 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16662 basic_block bb = BLOCK_FOR_INSN (insn);
16664 bool found = false;
16666 if (insn != BB_HEAD (bb))
16667 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16668 distance, PREV_INSN (insn),
16671 if (!found && distance < LEA_SEARCH_THRESHOLD)
16675 bool simple_loop = false;
16677 FOR_EACH_EDGE (e, ei, bb->preds)
16680 simple_loop = true;
16685 distance = distance_non_agu_define_in_bb (regno1, regno2,
16687 BB_END (bb), &found);
16690 int shortest_dist = -1;
16691 bool found_in_bb = false;
16693 FOR_EACH_EDGE (e, ei, bb->preds)
16696 = distance_non_agu_define_in_bb (regno1, regno2,
16702 if (shortest_dist < 0)
16703 shortest_dist = bb_dist;
16704 else if (bb_dist > 0)
16705 shortest_dist = MIN (bb_dist, shortest_dist);
16711 distance = shortest_dist;
16715 /* get_attr_type may modify recog data. We want to make sure
16716 that recog data is valid for instruction INSN, on which
16717 distance_non_agu_define is called. INSN is unchanged here. */
16718 extract_insn_cached (insn);
16723 return distance >> 1;
16726 /* Return the distance in half-cycles between INSN and the next
16727 insn that uses register number REGNO in memory address added
16728 to DISTANCE. Return -1 if REGNO0 is set.
16730 Put true value into *FOUND if register usage was found and
16732 Put true value into *REDEFINED if register redefinition was
16733 found and false otherwise. */
16736 distance_agu_use_in_bb (unsigned int regno,
16737 rtx insn, int distance, rtx start,
16738 bool *found, bool *redefined)
16740 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16745 *redefined = false;
16749 && distance < LEA_SEARCH_THRESHOLD)
16751 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16753 distance = increase_distance(prev, next, distance);
16754 if (insn_uses_reg_mem (regno, next))
16756 /* Return DISTANCE if OP0 is used in memory
16757 address in NEXT. */
16762 if (insn_defines_reg (regno, INVALID_REGNUM, next))
16764 /* Return -1 if OP0 is set in NEXT. */
16772 if (next == BB_END (bb))
16775 next = NEXT_INSN (next);
16781 /* Return the distance between INSN and the next insn that uses
16782 register number REGNO0 in memory address. Return -1 if no such
16783 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16786 distance_agu_use (unsigned int regno0, rtx insn)
16788 basic_block bb = BLOCK_FOR_INSN (insn);
16790 bool found = false;
16791 bool redefined = false;
16793 if (insn != BB_END (bb))
16794 distance = distance_agu_use_in_bb (regno0, insn, distance,
16796 &found, &redefined);
16798 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16802 bool simple_loop = false;
16804 FOR_EACH_EDGE (e, ei, bb->succs)
16807 simple_loop = true;
16812 distance = distance_agu_use_in_bb (regno0, insn,
16813 distance, BB_HEAD (bb),
16814 &found, &redefined);
16817 int shortest_dist = -1;
16818 bool found_in_bb = false;
16819 bool redefined_in_bb = false;
16821 FOR_EACH_EDGE (e, ei, bb->succs)
16824 = distance_agu_use_in_bb (regno0, insn,
16825 distance, BB_HEAD (e->dest),
16826 &found_in_bb, &redefined_in_bb);
16829 if (shortest_dist < 0)
16830 shortest_dist = bb_dist;
16831 else if (bb_dist > 0)
16832 shortest_dist = MIN (bb_dist, shortest_dist);
16838 distance = shortest_dist;
16842 if (!found || redefined)
16845 return distance >> 1;
16848 /* Define this macro to tune LEA priority vs ADD, it take effect when
16849 there is a dilemma of choicing LEA or ADD
16850 Negative value: ADD is more preferred than LEA
16852 Positive value: LEA is more preferred than ADD*/
16853 #define IX86_LEA_PRIORITY 0
16855 /* Return true if usage of lea INSN has performance advantage
16856 over a sequence of instructions. Instructions sequence has
16857 SPLIT_COST cycles higher latency than lea latency. */
16860 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
16861 unsigned int regno2, int split_cost)
16863 int dist_define, dist_use;
16865 dist_define = distance_non_agu_define (regno1, regno2, insn);
16866 dist_use = distance_agu_use (regno0, insn);
16868 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16870 /* If there is no non AGU operand definition, no AGU
16871 operand usage and split cost is 0 then both lea
16872 and non lea variants have same priority. Currently
16873 we prefer lea for 64 bit code and non lea on 32 bit
16875 if (dist_use < 0 && split_cost == 0)
16876 return TARGET_64BIT || IX86_LEA_PRIORITY;
16881 /* With longer definitions distance lea is more preferable.
16882 Here we change it to take into account splitting cost and
16884 dist_define += split_cost + IX86_LEA_PRIORITY;
16886 /* If there is no use in memory addess then we just check
16887 that split cost does not exceed AGU stall. */
16889 return dist_define >= LEA_MAX_STALL;
16891 /* If this insn has both backward non-agu dependence and forward
16892 agu dependence, the one with short distance takes effect. */
16893 return dist_define >= dist_use;
16896 /* Return true if it is legal to clobber flags by INSN and
16897 false otherwise. */
16900 ix86_ok_to_clobber_flags (rtx insn)
16902 basic_block bb = BLOCK_FOR_INSN (insn);
16908 if (NONDEBUG_INSN_P (insn))
16910 for (use = DF_INSN_USES (insn); *use; use++)
16911 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
16914 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
16918 if (insn == BB_END (bb))
16921 insn = NEXT_INSN (insn);
16924 live = df_get_live_out(bb);
16925 return !REGNO_REG_SET_P (live, FLAGS_REG);
16928 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16929 move and add to avoid AGU stalls. */
16932 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
16934 unsigned int regno0 = true_regnum (operands[0]);
16935 unsigned int regno1 = true_regnum (operands[1]);
16936 unsigned int regno2 = true_regnum (operands[2]);
16938 /* Check if we need to optimize. */
16939 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16942 /* Check it is correct to split here. */
16943 if (!ix86_ok_to_clobber_flags(insn))
16946 /* We need to split only adds with non destructive
16947 destination operand. */
16948 if (regno0 == regno1 || regno0 == regno2)
16951 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
16954 /* Return true if we should emit lea instruction instead of mov
16958 ix86_use_lea_for_mov (rtx insn, rtx operands[])
16960 unsigned int regno0;
16961 unsigned int regno1;
16963 /* Check if we need to optimize. */
16964 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16967 /* Use lea for reg to reg moves only. */
16968 if (!REG_P (operands[0]) || !REG_P (operands[1]))
16971 regno0 = true_regnum (operands[0]);
16972 regno1 = true_regnum (operands[1]);
16974 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0);
16977 /* Return true if we need to split lea into a sequence of
16978 instructions to avoid AGU stalls. */
16981 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
16983 unsigned int regno0 = true_regnum (operands[0]) ;
16984 unsigned int regno1 = INVALID_REGNUM;
16985 unsigned int regno2 = INVALID_REGNUM;
16986 int split_cost = 0;
16987 struct ix86_address parts;
16990 /* FIXME: Handle zero-extended addresses. */
16991 if (SImode_address_operand (operands[1], VOIDmode))
16994 /* Check we need to optimize. */
16995 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16998 /* The "at least two components" test below might not catch simple
16999 move insns if parts.base is non-NULL and parts.disp is const0_rtx
17000 as the only components in the address, e.g. if the register is
17001 %rbp or %r13. As this test is much cheaper and moves are the
17002 common case, do this check first. */
17003 if (REG_P (operands[1]))
17006 /* Check if it is OK to split here. */
17007 if (!ix86_ok_to_clobber_flags (insn))
17010 ok = ix86_decompose_address (operands[1], &parts);
17013 /* There should be at least two components in the address. */
17014 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
17015 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
17018 /* We should not split into add if non legitimate pic
17019 operand is used as displacement. */
17020 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
17024 regno1 = true_regnum (parts.base);
17026 regno2 = true_regnum (parts.index);
17028 /* Compute how many cycles we will add to execution time
17029 if split lea into a sequence of instructions. */
17030 if (parts.base || parts.index)
17032 /* Have to use mov instruction if non desctructive
17033 destination form is used. */
17034 if (regno1 != regno0 && regno2 != regno0)
17037 /* Have to add index to base if both exist. */
17038 if (parts.base && parts.index)
17041 /* Have to use shift and adds if scale is 2 or greater. */
17042 if (parts.scale > 1)
17044 if (regno0 != regno1)
17046 else if (regno2 == regno0)
17049 split_cost += parts.scale;
17052 /* Have to use add instruction with immediate if
17053 disp is non zero. */
17054 if (parts.disp && parts.disp != const0_rtx)
17057 /* Subtract the price of lea. */
17061 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
17064 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17065 matches destination. RTX includes clobber of FLAGS_REG. */
17068 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
17073 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
17074 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17076 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17079 /* Split lea instructions into a sequence of instructions
17080 which are executed on ALU to avoid AGU stalls.
17081 It is assumed that it is allowed to clobber flags register
17082 at lea position. */
17085 ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
17087 unsigned int regno0 = true_regnum (operands[0]) ;
17088 unsigned int regno1 = INVALID_REGNUM;
17089 unsigned int regno2 = INVALID_REGNUM;
17090 struct ix86_address parts;
17094 ok = ix86_decompose_address (operands[1], &parts);
17099 if (GET_MODE (parts.base) != mode)
17100 parts.base = gen_rtx_SUBREG (mode, parts.base, 0);
17101 regno1 = true_regnum (parts.base);
17106 if (GET_MODE (parts.index) != mode)
17107 parts.index = gen_rtx_SUBREG (mode, parts.index, 0);
17108 regno2 = true_regnum (parts.index);
17111 if (parts.scale > 1)
17113 /* Case r1 = r1 + ... */
17114 if (regno1 == regno0)
17116 /* If we have a case r1 = r1 + C * r1 then we
17117 should use multiplication which is very
17118 expensive. Assume cost model is wrong if we
17119 have such case here. */
17120 gcc_assert (regno2 != regno0);
17122 for (adds = parts.scale; adds > 0; adds--)
17123 ix86_emit_binop (PLUS, mode, operands[0], parts.index);
17127 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17128 if (regno0 != regno2)
17129 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
17131 /* Use shift for scaling. */
17132 ix86_emit_binop (ASHIFT, mode, operands[0],
17133 GEN_INT (exact_log2 (parts.scale)));
17136 ix86_emit_binop (PLUS, mode, operands[0], parts.base);
17138 if (parts.disp && parts.disp != const0_rtx)
17139 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
17142 else if (!parts.base && !parts.index)
17144 gcc_assert(parts.disp);
17145 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.disp));
17151 if (regno0 != regno2)
17152 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
17154 else if (!parts.index)
17156 if (regno0 != regno1)
17157 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
17161 if (regno0 == regno1)
17163 else if (regno0 == regno2)
17167 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
17171 ix86_emit_binop (PLUS, mode, operands[0], tmp);
17174 if (parts.disp && parts.disp != const0_rtx)
17175 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
17179 /* Return true if it is ok to optimize an ADD operation to LEA
17180 operation to avoid flag register consumation. For most processors,
17181 ADD is faster than LEA. For the processors like ATOM, if the
17182 destination register of LEA holds an actual address which will be
17183 used soon, LEA is better and otherwise ADD is better. */
17186 ix86_lea_for_add_ok (rtx insn, rtx operands[])
17188 unsigned int regno0 = true_regnum (operands[0]);
17189 unsigned int regno1 = true_regnum (operands[1]);
17190 unsigned int regno2 = true_regnum (operands[2]);
17192 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17193 if (regno0 != regno1 && regno0 != regno2)
17196 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17199 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
17202 /* Return true if destination reg of SET_BODY is shift count of
17206 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
17212 /* Retrieve destination of SET_BODY. */
17213 switch (GET_CODE (set_body))
17216 set_dest = SET_DEST (set_body);
17217 if (!set_dest || !REG_P (set_dest))
17221 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
17222 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
17230 /* Retrieve shift count of USE_BODY. */
17231 switch (GET_CODE (use_body))
17234 shift_rtx = XEXP (use_body, 1);
17237 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
17238 if (ix86_dep_by_shift_count_body (set_body,
17239 XVECEXP (use_body, 0, i)))
17247 && (GET_CODE (shift_rtx) == ASHIFT
17248 || GET_CODE (shift_rtx) == LSHIFTRT
17249 || GET_CODE (shift_rtx) == ASHIFTRT
17250 || GET_CODE (shift_rtx) == ROTATE
17251 || GET_CODE (shift_rtx) == ROTATERT))
17253 rtx shift_count = XEXP (shift_rtx, 1);
17255 /* Return true if shift count is dest of SET_BODY. */
17256 if (REG_P (shift_count)
17257 && true_regnum (set_dest) == true_regnum (shift_count))
17264 /* Return true if destination reg of SET_INSN is shift count of
17268 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
17270 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
17271 PATTERN (use_insn));
17274 /* Return TRUE or FALSE depending on whether the unary operator meets the
17275 appropriate constraints. */
17278 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
17279 enum machine_mode mode ATTRIBUTE_UNUSED,
17280 rtx operands[2] ATTRIBUTE_UNUSED)
17282 /* If one of operands is memory, source and destination must match. */
17283 if ((MEM_P (operands[0])
17284 || MEM_P (operands[1]))
17285 && ! rtx_equal_p (operands[0], operands[1]))
17290 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17291 are ok, keeping in mind the possible movddup alternative. */
17294 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
17296 if (MEM_P (operands[0]))
17297 return rtx_equal_p (operands[0], operands[1 + high]);
17298 if (MEM_P (operands[1]) && MEM_P (operands[2]))
17299 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
17303 /* Post-reload splitter for converting an SF or DFmode value in an
17304 SSE register into an unsigned SImode. */
17307 ix86_split_convert_uns_si_sse (rtx operands[])
17309 enum machine_mode vecmode;
17310 rtx value, large, zero_or_two31, input, two31, x;
17312 large = operands[1];
17313 zero_or_two31 = operands[2];
17314 input = operands[3];
17315 two31 = operands[4];
17316 vecmode = GET_MODE (large);
17317 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
17319 /* Load up the value into the low element. We must ensure that the other
17320 elements are valid floats -- zero is the easiest such value. */
17323 if (vecmode == V4SFmode)
17324 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
17326 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
17330 input = gen_rtx_REG (vecmode, REGNO (input));
17331 emit_move_insn (value, CONST0_RTX (vecmode));
17332 if (vecmode == V4SFmode)
17333 emit_insn (gen_sse_movss (value, value, input));
17335 emit_insn (gen_sse2_movsd (value, value, input));
17338 emit_move_insn (large, two31);
17339 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
17341 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
17342 emit_insn (gen_rtx_SET (VOIDmode, large, x));
17344 x = gen_rtx_AND (vecmode, zero_or_two31, large);
17345 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
17347 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
17348 emit_insn (gen_rtx_SET (VOIDmode, value, x));
17350 large = gen_rtx_REG (V4SImode, REGNO (large));
17351 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
17353 x = gen_rtx_REG (V4SImode, REGNO (value));
17354 if (vecmode == V4SFmode)
17355 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
17357 emit_insn (gen_sse2_cvttpd2dq (x, value));
17360 emit_insn (gen_xorv4si3 (value, value, large));
17363 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17364 Expects the 64-bit DImode to be supplied in a pair of integral
17365 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17366 -mfpmath=sse, !optimize_size only. */
17369 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
17371 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
17372 rtx int_xmm, fp_xmm;
17373 rtx biases, exponents;
17376 int_xmm = gen_reg_rtx (V4SImode);
17377 if (TARGET_INTER_UNIT_MOVES)
17378 emit_insn (gen_movdi_to_sse (int_xmm, input));
17379 else if (TARGET_SSE_SPLIT_REGS)
17381 emit_clobber (int_xmm);
17382 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
17386 x = gen_reg_rtx (V2DImode);
17387 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
17388 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
17391 x = gen_rtx_CONST_VECTOR (V4SImode,
17392 gen_rtvec (4, GEN_INT (0x43300000UL),
17393 GEN_INT (0x45300000UL),
17394 const0_rtx, const0_rtx));
17395 exponents = validize_mem (force_const_mem (V4SImode, x));
17397 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17398 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
17400 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17401 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17402 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17403 (0x1.0p84 + double(fp_value_hi_xmm)).
17404 Note these exponents differ by 32. */
17406 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
17408 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17409 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17410 real_ldexp (&bias_lo_rvt, &dconst1, 52);
17411 real_ldexp (&bias_hi_rvt, &dconst1, 84);
17412 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
17413 x = const_double_from_real_value (bias_hi_rvt, DFmode);
17414 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
17415 biases = validize_mem (force_const_mem (V2DFmode, biases));
17416 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
17418 /* Add the upper and lower DFmode values together. */
17420 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
17423 x = copy_to_mode_reg (V2DFmode, fp_xmm);
17424 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
17425 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
17428 ix86_expand_vector_extract (false, target, fp_xmm, 0);
17431 /* Not used, but eases macroization of patterns. */
17433 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
17434 rtx input ATTRIBUTE_UNUSED)
17436 gcc_unreachable ();
17439 /* Convert an unsigned SImode value into a DFmode. Only currently used
17440 for SSE, but applicable anywhere. */
17443 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
17445 REAL_VALUE_TYPE TWO31r;
17448 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
17449 NULL, 1, OPTAB_DIRECT);
17451 fp = gen_reg_rtx (DFmode);
17452 emit_insn (gen_floatsidf2 (fp, x));
17454 real_ldexp (&TWO31r, &dconst1, 31);
17455 x = const_double_from_real_value (TWO31r, DFmode);
17457 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
17459 emit_move_insn (target, x);
17462 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17463 32-bit mode; otherwise we have a direct convert instruction. */
17466 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
17468 REAL_VALUE_TYPE TWO32r;
17469 rtx fp_lo, fp_hi, x;
17471 fp_lo = gen_reg_rtx (DFmode);
17472 fp_hi = gen_reg_rtx (DFmode);
17474 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
17476 real_ldexp (&TWO32r, &dconst1, 32);
17477 x = const_double_from_real_value (TWO32r, DFmode);
17478 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
17480 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
17482 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
17485 emit_move_insn (target, x);
17488 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17489 For x86_32, -mfpmath=sse, !optimize_size only. */
17491 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
17493 REAL_VALUE_TYPE ONE16r;
17494 rtx fp_hi, fp_lo, int_hi, int_lo, x;
17496 real_ldexp (&ONE16r, &dconst1, 16);
17497 x = const_double_from_real_value (ONE16r, SFmode);
17498 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
17499 NULL, 0, OPTAB_DIRECT);
17500 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
17501 NULL, 0, OPTAB_DIRECT);
17502 fp_hi = gen_reg_rtx (SFmode);
17503 fp_lo = gen_reg_rtx (SFmode);
17504 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
17505 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
17506 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
17508 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
17510 if (!rtx_equal_p (target, fp_hi))
17511 emit_move_insn (target, fp_hi);
17514 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17515 a vector of unsigned ints VAL to vector of floats TARGET. */
17518 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
17521 REAL_VALUE_TYPE TWO16r;
17522 enum machine_mode intmode = GET_MODE (val);
17523 enum machine_mode fltmode = GET_MODE (target);
17524 rtx (*cvt) (rtx, rtx);
17526 if (intmode == V4SImode)
17527 cvt = gen_floatv4siv4sf2;
17529 cvt = gen_floatv8siv8sf2;
17530 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
17531 tmp[0] = force_reg (intmode, tmp[0]);
17532 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
17534 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
17535 NULL_RTX, 1, OPTAB_DIRECT);
17536 tmp[3] = gen_reg_rtx (fltmode);
17537 emit_insn (cvt (tmp[3], tmp[1]));
17538 tmp[4] = gen_reg_rtx (fltmode);
17539 emit_insn (cvt (tmp[4], tmp[2]));
17540 real_ldexp (&TWO16r, &dconst1, 16);
17541 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
17542 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
17543 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
17545 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
17547 if (tmp[7] != target)
17548 emit_move_insn (target, tmp[7]);
17551 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17552 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17553 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17554 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17557 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
17559 REAL_VALUE_TYPE TWO31r;
17560 rtx two31r, tmp[4];
17561 enum machine_mode mode = GET_MODE (val);
17562 enum machine_mode scalarmode = GET_MODE_INNER (mode);
17563 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
17564 rtx (*cmp) (rtx, rtx, rtx, rtx);
17567 for (i = 0; i < 3; i++)
17568 tmp[i] = gen_reg_rtx (mode);
17569 real_ldexp (&TWO31r, &dconst1, 31);
17570 two31r = const_double_from_real_value (TWO31r, scalarmode);
17571 two31r = ix86_build_const_vector (mode, 1, two31r);
17572 two31r = force_reg (mode, two31r);
17575 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
17576 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
17577 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
17578 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
17579 default: gcc_unreachable ();
17581 tmp[3] = gen_rtx_LE (mode, two31r, val);
17582 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
17583 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
17585 if (intmode == V4SImode || TARGET_AVX2)
17586 *xorp = expand_simple_binop (intmode, ASHIFT,
17587 gen_lowpart (intmode, tmp[0]),
17588 GEN_INT (31), NULL_RTX, 0,
17592 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
17593 two31 = ix86_build_const_vector (intmode, 1, two31);
17594 *xorp = expand_simple_binop (intmode, AND,
17595 gen_lowpart (intmode, tmp[0]),
17596 two31, NULL_RTX, 0,
17599 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
17603 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17604 then replicate the value for all elements of the vector
17608 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
17612 enum machine_mode scalar_mode;
17629 n_elt = GET_MODE_NUNITS (mode);
17630 v = rtvec_alloc (n_elt);
17631 scalar_mode = GET_MODE_INNER (mode);
17633 RTVEC_ELT (v, 0) = value;
17635 for (i = 1; i < n_elt; ++i)
17636 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
17638 return gen_rtx_CONST_VECTOR (mode, v);
17641 gcc_unreachable ();
17645 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17646 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17647 for an SSE register. If VECT is true, then replicate the mask for
17648 all elements of the vector register. If INVERT is true, then create
17649 a mask excluding the sign bit. */
17652 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
17654 enum machine_mode vec_mode, imode;
17655 HOST_WIDE_INT hi, lo;
17660 /* Find the sign bit, sign extended to 2*HWI. */
17668 mode = GET_MODE_INNER (mode);
17670 lo = 0x80000000, hi = lo < 0;
17678 mode = GET_MODE_INNER (mode);
17680 if (HOST_BITS_PER_WIDE_INT >= 64)
17681 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
17683 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17688 vec_mode = VOIDmode;
17689 if (HOST_BITS_PER_WIDE_INT >= 64)
17692 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
17699 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17703 lo = ~lo, hi = ~hi;
17709 mask = immed_double_const (lo, hi, imode);
17711 vec = gen_rtvec (2, v, mask);
17712 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
17713 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17720 gcc_unreachable ();
17724 lo = ~lo, hi = ~hi;
17726 /* Force this value into the low part of a fp vector constant. */
17727 mask = immed_double_const (lo, hi, imode);
17728 mask = gen_lowpart (mode, mask);
17730 if (vec_mode == VOIDmode)
17731 return force_reg (mode, mask);
17733 v = ix86_build_const_vector (vec_mode, vect, mask);
17734 return force_reg (vec_mode, v);
17737 /* Generate code for floating point ABS or NEG. */
17740 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17743 rtx mask, set, dst, src;
17744 bool use_sse = false;
17745 bool vector_mode = VECTOR_MODE_P (mode);
17746 enum machine_mode vmode = mode;
17750 else if (mode == TFmode)
17752 else if (TARGET_SSE_MATH)
17754 use_sse = SSE_FLOAT_MODE_P (mode);
17755 if (mode == SFmode)
17757 else if (mode == DFmode)
17761 /* NEG and ABS performed with SSE use bitwise mask operations.
17762 Create the appropriate mask now. */
17764 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17771 set = gen_rtx_fmt_e (code, mode, src);
17772 set = gen_rtx_SET (VOIDmode, dst, set);
17779 use = gen_rtx_USE (VOIDmode, mask);
17781 par = gen_rtvec (2, set, use);
17784 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17785 par = gen_rtvec (3, set, use, clob);
17787 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17793 /* Expand a copysign operation. Special case operand 0 being a constant. */
17796 ix86_expand_copysign (rtx operands[])
17798 enum machine_mode mode, vmode;
17799 rtx dest, op0, op1, mask, nmask;
17801 dest = operands[0];
17805 mode = GET_MODE (dest);
17807 if (mode == SFmode)
17809 else if (mode == DFmode)
17814 if (GET_CODE (op0) == CONST_DOUBLE)
17816 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17818 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17819 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17821 if (mode == SFmode || mode == DFmode)
17823 if (op0 == CONST0_RTX (mode))
17824 op0 = CONST0_RTX (vmode);
17827 rtx v = ix86_build_const_vector (vmode, false, op0);
17829 op0 = force_reg (vmode, v);
17832 else if (op0 != CONST0_RTX (mode))
17833 op0 = force_reg (mode, op0);
17835 mask = ix86_build_signbit_mask (vmode, 0, 0);
17837 if (mode == SFmode)
17838 copysign_insn = gen_copysignsf3_const;
17839 else if (mode == DFmode)
17840 copysign_insn = gen_copysigndf3_const;
17842 copysign_insn = gen_copysigntf3_const;
17844 emit_insn (copysign_insn (dest, op0, op1, mask));
17848 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17850 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17851 mask = ix86_build_signbit_mask (vmode, 0, 0);
17853 if (mode == SFmode)
17854 copysign_insn = gen_copysignsf3_var;
17855 else if (mode == DFmode)
17856 copysign_insn = gen_copysigndf3_var;
17858 copysign_insn = gen_copysigntf3_var;
17860 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17864 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17865 be a constant, and so has already been expanded into a vector constant. */
17868 ix86_split_copysign_const (rtx operands[])
17870 enum machine_mode mode, vmode;
17871 rtx dest, op0, mask, x;
17873 dest = operands[0];
17875 mask = operands[3];
17877 mode = GET_MODE (dest);
17878 vmode = GET_MODE (mask);
17880 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17881 x = gen_rtx_AND (vmode, dest, mask);
17882 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17884 if (op0 != CONST0_RTX (vmode))
17886 x = gen_rtx_IOR (vmode, dest, op0);
17887 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17891 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17892 so we have to do two masks. */
17895 ix86_split_copysign_var (rtx operands[])
17897 enum machine_mode mode, vmode;
17898 rtx dest, scratch, op0, op1, mask, nmask, x;
17900 dest = operands[0];
17901 scratch = operands[1];
17904 nmask = operands[4];
17905 mask = operands[5];
17907 mode = GET_MODE (dest);
17908 vmode = GET_MODE (mask);
17910 if (rtx_equal_p (op0, op1))
17912 /* Shouldn't happen often (it's useless, obviously), but when it does
17913 we'd generate incorrect code if we continue below. */
17914 emit_move_insn (dest, op0);
17918 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17920 gcc_assert (REGNO (op1) == REGNO (scratch));
17922 x = gen_rtx_AND (vmode, scratch, mask);
17923 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17926 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17927 x = gen_rtx_NOT (vmode, dest);
17928 x = gen_rtx_AND (vmode, x, op0);
17929 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17933 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17935 x = gen_rtx_AND (vmode, scratch, mask);
17937 else /* alternative 2,4 */
17939 gcc_assert (REGNO (mask) == REGNO (scratch));
17940 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17941 x = gen_rtx_AND (vmode, scratch, op1);
17943 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17945 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17947 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17948 x = gen_rtx_AND (vmode, dest, nmask);
17950 else /* alternative 3,4 */
17952 gcc_assert (REGNO (nmask) == REGNO (dest));
17954 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17955 x = gen_rtx_AND (vmode, dest, op0);
17957 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17960 x = gen_rtx_IOR (vmode, dest, scratch);
17961 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17964 /* Return TRUE or FALSE depending on whether the first SET in INSN
17965 has source and destination with matching CC modes, and that the
17966 CC mode is at least as constrained as REQ_MODE. */
17969 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17972 enum machine_mode set_mode;
17974 set = PATTERN (insn);
17975 if (GET_CODE (set) == PARALLEL)
17976 set = XVECEXP (set, 0, 0);
17977 gcc_assert (GET_CODE (set) == SET);
17978 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17980 set_mode = GET_MODE (SET_DEST (set));
17984 if (req_mode != CCNOmode
17985 && (req_mode != CCmode
17986 || XEXP (SET_SRC (set), 1) != const0_rtx))
17990 if (req_mode == CCGCmode)
17994 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17998 if (req_mode == CCZmode)
18008 if (set_mode != req_mode)
18013 gcc_unreachable ();
18016 return GET_MODE (SET_SRC (set)) == set_mode;
18019 /* Generate insn patterns to do an integer compare of OPERANDS. */
18022 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
18024 enum machine_mode cmpmode;
18027 cmpmode = SELECT_CC_MODE (code, op0, op1);
18028 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
18030 /* This is very simple, but making the interface the same as in the
18031 FP case makes the rest of the code easier. */
18032 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
18033 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
18035 /* Return the test that should be put into the flags user, i.e.
18036 the bcc, scc, or cmov instruction. */
18037 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
18040 /* Figure out whether to use ordered or unordered fp comparisons.
18041 Return the appropriate mode to use. */
18044 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
18046 /* ??? In order to make all comparisons reversible, we do all comparisons
18047 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18048 all forms trapping and nontrapping comparisons, we can make inequality
18049 comparisons trapping again, since it results in better code when using
18050 FCOM based compares. */
18051 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
18055 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
18057 enum machine_mode mode = GET_MODE (op0);
18059 if (SCALAR_FLOAT_MODE_P (mode))
18061 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18062 return ix86_fp_compare_mode (code);
18067 /* Only zero flag is needed. */
18068 case EQ: /* ZF=0 */
18069 case NE: /* ZF!=0 */
18071 /* Codes needing carry flag. */
18072 case GEU: /* CF=0 */
18073 case LTU: /* CF=1 */
18074 /* Detect overflow checks. They need just the carry flag. */
18075 if (GET_CODE (op0) == PLUS
18076 && rtx_equal_p (op1, XEXP (op0, 0)))
18080 case GTU: /* CF=0 & ZF=0 */
18081 case LEU: /* CF=1 | ZF=1 */
18083 /* Codes possibly doable only with sign flag when
18084 comparing against zero. */
18085 case GE: /* SF=OF or SF=0 */
18086 case LT: /* SF<>OF or SF=1 */
18087 if (op1 == const0_rtx)
18090 /* For other cases Carry flag is not required. */
18092 /* Codes doable only with sign flag when comparing
18093 against zero, but we miss jump instruction for it
18094 so we need to use relational tests against overflow
18095 that thus needs to be zero. */
18096 case GT: /* ZF=0 & SF=OF */
18097 case LE: /* ZF=1 | SF<>OF */
18098 if (op1 == const0_rtx)
18102 /* strcmp pattern do (use flags) and combine may ask us for proper
18107 gcc_unreachable ();
18111 /* Return the fixed registers used for condition codes. */
18114 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
18121 /* If two condition code modes are compatible, return a condition code
18122 mode which is compatible with both. Otherwise, return
18125 static enum machine_mode
18126 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
18131 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
18134 if ((m1 == CCGCmode && m2 == CCGOCmode)
18135 || (m1 == CCGOCmode && m2 == CCGCmode))
18141 gcc_unreachable ();
18171 /* These are only compatible with themselves, which we already
18178 /* Return a comparison we can do and that it is equivalent to
18179 swap_condition (code) apart possibly from orderedness.
18180 But, never change orderedness if TARGET_IEEE_FP, returning
18181 UNKNOWN in that case if necessary. */
18183 static enum rtx_code
18184 ix86_fp_swap_condition (enum rtx_code code)
18188 case GT: /* GTU - CF=0 & ZF=0 */
18189 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
18190 case GE: /* GEU - CF=0 */
18191 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
18192 case UNLT: /* LTU - CF=1 */
18193 return TARGET_IEEE_FP ? UNKNOWN : GT;
18194 case UNLE: /* LEU - CF=1 | ZF=1 */
18195 return TARGET_IEEE_FP ? UNKNOWN : GE;
18197 return swap_condition (code);
18201 /* Return cost of comparison CODE using the best strategy for performance.
18202 All following functions do use number of instructions as a cost metrics.
18203 In future this should be tweaked to compute bytes for optimize_size and
18204 take into account performance of various instructions on various CPUs. */
18207 ix86_fp_comparison_cost (enum rtx_code code)
18211 /* The cost of code using bit-twiddling on %ah. */
18228 arith_cost = TARGET_IEEE_FP ? 5 : 4;
18232 arith_cost = TARGET_IEEE_FP ? 6 : 4;
18235 gcc_unreachable ();
18238 switch (ix86_fp_comparison_strategy (code))
18240 case IX86_FPCMP_COMI:
18241 return arith_cost > 4 ? 3 : 2;
18242 case IX86_FPCMP_SAHF:
18243 return arith_cost > 4 ? 4 : 3;
18249 /* Return strategy to use for floating-point. We assume that fcomi is always
18250 preferrable where available, since that is also true when looking at size
18251 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18253 enum ix86_fpcmp_strategy
18254 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
18256 /* Do fcomi/sahf based test when profitable. */
18259 return IX86_FPCMP_COMI;
18261 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
18262 return IX86_FPCMP_SAHF;
18264 return IX86_FPCMP_ARITH;
18267 /* Swap, force into registers, or otherwise massage the two operands
18268 to a fp comparison. The operands are updated in place; the new
18269 comparison code is returned. */
18271 static enum rtx_code
18272 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
18274 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
18275 rtx op0 = *pop0, op1 = *pop1;
18276 enum machine_mode op_mode = GET_MODE (op0);
18277 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
18279 /* All of the unordered compare instructions only work on registers.
18280 The same is true of the fcomi compare instructions. The XFmode
18281 compare instructions require registers except when comparing
18282 against zero or when converting operand 1 from fixed point to
18286 && (fpcmp_mode == CCFPUmode
18287 || (op_mode == XFmode
18288 && ! (standard_80387_constant_p (op0) == 1
18289 || standard_80387_constant_p (op1) == 1)
18290 && GET_CODE (op1) != FLOAT)
18291 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
18293 op0 = force_reg (op_mode, op0);
18294 op1 = force_reg (op_mode, op1);
18298 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18299 things around if they appear profitable, otherwise force op0
18300 into a register. */
18302 if (standard_80387_constant_p (op0) == 0
18304 && ! (standard_80387_constant_p (op1) == 0
18307 enum rtx_code new_code = ix86_fp_swap_condition (code);
18308 if (new_code != UNKNOWN)
18311 tmp = op0, op0 = op1, op1 = tmp;
18317 op0 = force_reg (op_mode, op0);
18319 if (CONSTANT_P (op1))
18321 int tmp = standard_80387_constant_p (op1);
18323 op1 = validize_mem (force_const_mem (op_mode, op1));
18327 op1 = force_reg (op_mode, op1);
18330 op1 = force_reg (op_mode, op1);
18334 /* Try to rearrange the comparison to make it cheaper. */
18335 if (ix86_fp_comparison_cost (code)
18336 > ix86_fp_comparison_cost (swap_condition (code))
18337 && (REG_P (op1) || can_create_pseudo_p ()))
18340 tmp = op0, op0 = op1, op1 = tmp;
18341 code = swap_condition (code);
18343 op0 = force_reg (op_mode, op0);
18351 /* Convert comparison codes we use to represent FP comparison to integer
18352 code that will result in proper branch. Return UNKNOWN if no such code
18356 ix86_fp_compare_code_to_integer (enum rtx_code code)
18385 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18388 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
18390 enum machine_mode fpcmp_mode, intcmp_mode;
18393 fpcmp_mode = ix86_fp_compare_mode (code);
18394 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
18396 /* Do fcomi/sahf based test when profitable. */
18397 switch (ix86_fp_comparison_strategy (code))
18399 case IX86_FPCMP_COMI:
18400 intcmp_mode = fpcmp_mode;
18401 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18402 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18407 case IX86_FPCMP_SAHF:
18408 intcmp_mode = fpcmp_mode;
18409 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18410 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18414 scratch = gen_reg_rtx (HImode);
18415 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
18416 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
18419 case IX86_FPCMP_ARITH:
18420 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18421 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18422 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
18424 scratch = gen_reg_rtx (HImode);
18425 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
18427 /* In the unordered case, we have to check C2 for NaN's, which
18428 doesn't happen to work out to anything nice combination-wise.
18429 So do some bit twiddling on the value we've got in AH to come
18430 up with an appropriate set of condition codes. */
18432 intcmp_mode = CCNOmode;
18437 if (code == GT || !TARGET_IEEE_FP)
18439 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18444 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18445 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18446 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
18447 intcmp_mode = CCmode;
18453 if (code == LT && TARGET_IEEE_FP)
18455 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18456 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
18457 intcmp_mode = CCmode;
18462 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
18468 if (code == GE || !TARGET_IEEE_FP)
18470 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
18475 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18476 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
18482 if (code == LE && TARGET_IEEE_FP)
18484 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18485 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18486 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18487 intcmp_mode = CCmode;
18492 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18498 if (code == EQ && TARGET_IEEE_FP)
18500 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18501 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18502 intcmp_mode = CCmode;
18507 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18513 if (code == NE && TARGET_IEEE_FP)
18515 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18516 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
18522 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18528 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
18532 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
18537 gcc_unreachable ();
18545 /* Return the test that should be put into the flags user, i.e.
18546 the bcc, scc, or cmov instruction. */
18547 return gen_rtx_fmt_ee (code, VOIDmode,
18548 gen_rtx_REG (intcmp_mode, FLAGS_REG),
18553 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
18557 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
18558 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
18560 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
18562 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
18563 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18566 ret = ix86_expand_int_compare (code, op0, op1);
18572 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
18574 enum machine_mode mode = GET_MODE (op0);
18586 tmp = ix86_expand_compare (code, op0, op1);
18587 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18588 gen_rtx_LABEL_REF (VOIDmode, label),
18590 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18597 /* Expand DImode branch into multiple compare+branch. */
18599 rtx lo[2], hi[2], label2;
18600 enum rtx_code code1, code2, code3;
18601 enum machine_mode submode;
18603 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
18605 tmp = op0, op0 = op1, op1 = tmp;
18606 code = swap_condition (code);
18609 split_double_mode (mode, &op0, 1, lo+0, hi+0);
18610 split_double_mode (mode, &op1, 1, lo+1, hi+1);
18612 submode = mode == DImode ? SImode : DImode;
18614 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18615 avoid two branches. This costs one extra insn, so disable when
18616 optimizing for size. */
18618 if ((code == EQ || code == NE)
18619 && (!optimize_insn_for_size_p ()
18620 || hi[1] == const0_rtx || lo[1] == const0_rtx))
18625 if (hi[1] != const0_rtx)
18626 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
18627 NULL_RTX, 0, OPTAB_WIDEN);
18630 if (lo[1] != const0_rtx)
18631 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
18632 NULL_RTX, 0, OPTAB_WIDEN);
18634 tmp = expand_binop (submode, ior_optab, xor1, xor0,
18635 NULL_RTX, 0, OPTAB_WIDEN);
18637 ix86_expand_branch (code, tmp, const0_rtx, label);
18641 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18642 op1 is a constant and the low word is zero, then we can just
18643 examine the high word. Similarly for low word -1 and
18644 less-or-equal-than or greater-than. */
18646 if (CONST_INT_P (hi[1]))
18649 case LT: case LTU: case GE: case GEU:
18650 if (lo[1] == const0_rtx)
18652 ix86_expand_branch (code, hi[0], hi[1], label);
18656 case LE: case LEU: case GT: case GTU:
18657 if (lo[1] == constm1_rtx)
18659 ix86_expand_branch (code, hi[0], hi[1], label);
18667 /* Otherwise, we need two or three jumps. */
18669 label2 = gen_label_rtx ();
18672 code2 = swap_condition (code);
18673 code3 = unsigned_condition (code);
18677 case LT: case GT: case LTU: case GTU:
18680 case LE: code1 = LT; code2 = GT; break;
18681 case GE: code1 = GT; code2 = LT; break;
18682 case LEU: code1 = LTU; code2 = GTU; break;
18683 case GEU: code1 = GTU; code2 = LTU; break;
18685 case EQ: code1 = UNKNOWN; code2 = NE; break;
18686 case NE: code2 = UNKNOWN; break;
18689 gcc_unreachable ();
18694 * if (hi(a) < hi(b)) goto true;
18695 * if (hi(a) > hi(b)) goto false;
18696 * if (lo(a) < lo(b)) goto true;
18700 if (code1 != UNKNOWN)
18701 ix86_expand_branch (code1, hi[0], hi[1], label);
18702 if (code2 != UNKNOWN)
18703 ix86_expand_branch (code2, hi[0], hi[1], label2);
18705 ix86_expand_branch (code3, lo[0], lo[1], label);
18707 if (code2 != UNKNOWN)
18708 emit_label (label2);
18713 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18718 /* Split branch based on floating point condition. */
18720 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18721 rtx target1, rtx target2, rtx tmp, rtx pushed)
18726 if (target2 != pc_rtx)
18729 code = reverse_condition_maybe_unordered (code);
18734 condition = ix86_expand_fp_compare (code, op1, op2,
18737 /* Remove pushed operand from stack. */
18739 ix86_free_from_memory (GET_MODE (pushed));
18741 i = emit_jump_insn (gen_rtx_SET
18743 gen_rtx_IF_THEN_ELSE (VOIDmode,
18744 condition, target1, target2)));
18745 if (split_branch_probability >= 0)
18746 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18750 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18754 gcc_assert (GET_MODE (dest) == QImode);
18756 ret = ix86_expand_compare (code, op0, op1);
18757 PUT_MODE (ret, QImode);
18758 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18761 /* Expand comparison setting or clearing carry flag. Return true when
18762 successful and set pop for the operation. */
18764 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18766 enum machine_mode mode =
18767 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18769 /* Do not handle double-mode compares that go through special path. */
18770 if (mode == (TARGET_64BIT ? TImode : DImode))
18773 if (SCALAR_FLOAT_MODE_P (mode))
18775 rtx compare_op, compare_seq;
18777 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18779 /* Shortcut: following common codes never translate
18780 into carry flag compares. */
18781 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18782 || code == ORDERED || code == UNORDERED)
18785 /* These comparisons require zero flag; swap operands so they won't. */
18786 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18787 && !TARGET_IEEE_FP)
18792 code = swap_condition (code);
18795 /* Try to expand the comparison and verify that we end up with
18796 carry flag based comparison. This fails to be true only when
18797 we decide to expand comparison using arithmetic that is not
18798 too common scenario. */
18800 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18801 compare_seq = get_insns ();
18804 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18805 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18806 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18808 code = GET_CODE (compare_op);
18810 if (code != LTU && code != GEU)
18813 emit_insn (compare_seq);
18818 if (!INTEGRAL_MODE_P (mode))
18827 /* Convert a==0 into (unsigned)a<1. */
18830 if (op1 != const0_rtx)
18833 code = (code == EQ ? LTU : GEU);
18836 /* Convert a>b into b<a or a>=b-1. */
18839 if (CONST_INT_P (op1))
18841 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18842 /* Bail out on overflow. We still can swap operands but that
18843 would force loading of the constant into register. */
18844 if (op1 == const0_rtx
18845 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18847 code = (code == GTU ? GEU : LTU);
18854 code = (code == GTU ? LTU : GEU);
18858 /* Convert a>=0 into (unsigned)a<0x80000000. */
18861 if (mode == DImode || op1 != const0_rtx)
18863 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18864 code = (code == LT ? GEU : LTU);
18868 if (mode == DImode || op1 != constm1_rtx)
18870 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18871 code = (code == LE ? GEU : LTU);
18877 /* Swapping operands may cause constant to appear as first operand. */
18878 if (!nonimmediate_operand (op0, VOIDmode))
18880 if (!can_create_pseudo_p ())
18882 op0 = force_reg (mode, op0);
18884 *pop = ix86_expand_compare (code, op0, op1);
18885 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18890 ix86_expand_int_movcc (rtx operands[])
18892 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18893 rtx compare_seq, compare_op;
18894 enum machine_mode mode = GET_MODE (operands[0]);
18895 bool sign_bit_compare_p = false;
18896 rtx op0 = XEXP (operands[1], 0);
18897 rtx op1 = XEXP (operands[1], 1);
18900 compare_op = ix86_expand_compare (code, op0, op1);
18901 compare_seq = get_insns ();
18904 compare_code = GET_CODE (compare_op);
18906 if ((op1 == const0_rtx && (code == GE || code == LT))
18907 || (op1 == constm1_rtx && (code == GT || code == LE)))
18908 sign_bit_compare_p = true;
18910 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18911 HImode insns, we'd be swallowed in word prefix ops. */
18913 if ((mode != HImode || TARGET_FAST_PREFIX)
18914 && (mode != (TARGET_64BIT ? TImode : DImode))
18915 && CONST_INT_P (operands[2])
18916 && CONST_INT_P (operands[3]))
18918 rtx out = operands[0];
18919 HOST_WIDE_INT ct = INTVAL (operands[2]);
18920 HOST_WIDE_INT cf = INTVAL (operands[3]);
18921 HOST_WIDE_INT diff;
18924 /* Sign bit compares are better done using shifts than we do by using
18926 if (sign_bit_compare_p
18927 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18929 /* Detect overlap between destination and compare sources. */
18932 if (!sign_bit_compare_p)
18935 bool fpcmp = false;
18937 compare_code = GET_CODE (compare_op);
18939 flags = XEXP (compare_op, 0);
18941 if (GET_MODE (flags) == CCFPmode
18942 || GET_MODE (flags) == CCFPUmode)
18946 = ix86_fp_compare_code_to_integer (compare_code);
18949 /* To simplify rest of code, restrict to the GEU case. */
18950 if (compare_code == LTU)
18952 HOST_WIDE_INT tmp = ct;
18955 compare_code = reverse_condition (compare_code);
18956 code = reverse_condition (code);
18961 PUT_CODE (compare_op,
18962 reverse_condition_maybe_unordered
18963 (GET_CODE (compare_op)));
18965 PUT_CODE (compare_op,
18966 reverse_condition (GET_CODE (compare_op)));
18970 if (reg_overlap_mentioned_p (out, op0)
18971 || reg_overlap_mentioned_p (out, op1))
18972 tmp = gen_reg_rtx (mode);
18974 if (mode == DImode)
18975 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18977 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18978 flags, compare_op));
18982 if (code == GT || code == GE)
18983 code = reverse_condition (code);
18986 HOST_WIDE_INT tmp = ct;
18991 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
19004 tmp = expand_simple_binop (mode, PLUS,
19006 copy_rtx (tmp), 1, OPTAB_DIRECT);
19017 tmp = expand_simple_binop (mode, IOR,
19019 copy_rtx (tmp), 1, OPTAB_DIRECT);
19021 else if (diff == -1 && ct)
19031 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
19033 tmp = expand_simple_binop (mode, PLUS,
19034 copy_rtx (tmp), GEN_INT (cf),
19035 copy_rtx (tmp), 1, OPTAB_DIRECT);
19043 * andl cf - ct, dest
19053 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
19056 tmp = expand_simple_binop (mode, AND,
19058 gen_int_mode (cf - ct, mode),
19059 copy_rtx (tmp), 1, OPTAB_DIRECT);
19061 tmp = expand_simple_binop (mode, PLUS,
19062 copy_rtx (tmp), GEN_INT (ct),
19063 copy_rtx (tmp), 1, OPTAB_DIRECT);
19066 if (!rtx_equal_p (tmp, out))
19067 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
19074 enum machine_mode cmp_mode = GET_MODE (op0);
19077 tmp = ct, ct = cf, cf = tmp;
19080 if (SCALAR_FLOAT_MODE_P (cmp_mode))
19082 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
19084 /* We may be reversing unordered compare to normal compare, that
19085 is not valid in general (we may convert non-trapping condition
19086 to trapping one), however on i386 we currently emit all
19087 comparisons unordered. */
19088 compare_code = reverse_condition_maybe_unordered (compare_code);
19089 code = reverse_condition_maybe_unordered (code);
19093 compare_code = reverse_condition (compare_code);
19094 code = reverse_condition (code);
19098 compare_code = UNKNOWN;
19099 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
19100 && CONST_INT_P (op1))
19102 if (op1 == const0_rtx
19103 && (code == LT || code == GE))
19104 compare_code = code;
19105 else if (op1 == constm1_rtx)
19109 else if (code == GT)
19114 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19115 if (compare_code != UNKNOWN
19116 && GET_MODE (op0) == GET_MODE (out)
19117 && (cf == -1 || ct == -1))
19119 /* If lea code below could be used, only optimize
19120 if it results in a 2 insn sequence. */
19122 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
19123 || diff == 3 || diff == 5 || diff == 9)
19124 || (compare_code == LT && ct == -1)
19125 || (compare_code == GE && cf == -1))
19128 * notl op1 (if necessary)
19136 code = reverse_condition (code);
19139 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
19141 out = expand_simple_binop (mode, IOR,
19143 out, 1, OPTAB_DIRECT);
19144 if (out != operands[0])
19145 emit_move_insn (operands[0], out);
19152 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
19153 || diff == 3 || diff == 5 || diff == 9)
19154 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
19156 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
19162 * lea cf(dest*(ct-cf)),dest
19166 * This also catches the degenerate setcc-only case.
19172 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19175 /* On x86_64 the lea instruction operates on Pmode, so we need
19176 to get arithmetics done in proper mode to match. */
19178 tmp = copy_rtx (out);
19182 out1 = copy_rtx (out);
19183 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
19187 tmp = gen_rtx_PLUS (mode, tmp, out1);
19193 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
19196 if (!rtx_equal_p (tmp, out))
19199 out = force_operand (tmp, copy_rtx (out));
19201 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
19203 if (!rtx_equal_p (out, operands[0]))
19204 emit_move_insn (operands[0], copy_rtx (out));
19210 * General case: Jumpful:
19211 * xorl dest,dest cmpl op1, op2
19212 * cmpl op1, op2 movl ct, dest
19213 * setcc dest jcc 1f
19214 * decl dest movl cf, dest
19215 * andl (cf-ct),dest 1:
19218 * Size 20. Size 14.
19220 * This is reasonably steep, but branch mispredict costs are
19221 * high on modern cpus, so consider failing only if optimizing
19225 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19226 && BRANCH_COST (optimize_insn_for_speed_p (),
19231 enum machine_mode cmp_mode = GET_MODE (op0);
19236 if (SCALAR_FLOAT_MODE_P (cmp_mode))
19238 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
19240 /* We may be reversing unordered compare to normal compare,
19241 that is not valid in general (we may convert non-trapping
19242 condition to trapping one), however on i386 we currently
19243 emit all comparisons unordered. */
19244 code = reverse_condition_maybe_unordered (code);
19248 code = reverse_condition (code);
19249 if (compare_code != UNKNOWN)
19250 compare_code = reverse_condition (compare_code);
19254 if (compare_code != UNKNOWN)
19256 /* notl op1 (if needed)
19261 For x < 0 (resp. x <= -1) there will be no notl,
19262 so if possible swap the constants to get rid of the
19264 True/false will be -1/0 while code below (store flag
19265 followed by decrement) is 0/-1, so the constants need
19266 to be exchanged once more. */
19268 if (compare_code == GE || !cf)
19270 code = reverse_condition (code);
19275 HOST_WIDE_INT tmp = cf;
19280 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
19284 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19286 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
19288 copy_rtx (out), 1, OPTAB_DIRECT);
19291 out = expand_simple_binop (mode, AND, copy_rtx (out),
19292 gen_int_mode (cf - ct, mode),
19293 copy_rtx (out), 1, OPTAB_DIRECT);
19295 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
19296 copy_rtx (out), 1, OPTAB_DIRECT);
19297 if (!rtx_equal_p (out, operands[0]))
19298 emit_move_insn (operands[0], copy_rtx (out));
19304 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19306 /* Try a few things more with specific constants and a variable. */
19309 rtx var, orig_out, out, tmp;
19311 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19314 /* If one of the two operands is an interesting constant, load a
19315 constant with the above and mask it in with a logical operation. */
19317 if (CONST_INT_P (operands[2]))
19320 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
19321 operands[3] = constm1_rtx, op = and_optab;
19322 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
19323 operands[3] = const0_rtx, op = ior_optab;
19327 else if (CONST_INT_P (operands[3]))
19330 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
19331 operands[2] = constm1_rtx, op = and_optab;
19332 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
19333 operands[2] = const0_rtx, op = ior_optab;
19340 orig_out = operands[0];
19341 tmp = gen_reg_rtx (mode);
19344 /* Recurse to get the constant loaded. */
19345 if (ix86_expand_int_movcc (operands) == 0)
19348 /* Mask in the interesting variable. */
19349 out = expand_binop (mode, op, var, tmp, orig_out, 0,
19351 if (!rtx_equal_p (out, orig_out))
19352 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
19358 * For comparison with above,
19368 if (! nonimmediate_operand (operands[2], mode))
19369 operands[2] = force_reg (mode, operands[2]);
19370 if (! nonimmediate_operand (operands[3], mode))
19371 operands[3] = force_reg (mode, operands[3]);
19373 if (! register_operand (operands[2], VOIDmode)
19375 || ! register_operand (operands[3], VOIDmode)))
19376 operands[2] = force_reg (mode, operands[2]);
19379 && ! register_operand (operands[3], VOIDmode))
19380 operands[3] = force_reg (mode, operands[3]);
19382 emit_insn (compare_seq);
19383 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19384 gen_rtx_IF_THEN_ELSE (mode,
19385 compare_op, operands[2],
19390 /* Swap, force into registers, or otherwise massage the two operands
19391 to an sse comparison with a mask result. Thus we differ a bit from
19392 ix86_prepare_fp_compare_args which expects to produce a flags result.
19394 The DEST operand exists to help determine whether to commute commutative
19395 operators. The POP0/POP1 operands are updated in place. The new
19396 comparison code is returned, or UNKNOWN if not implementable. */
19398 static enum rtx_code
19399 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
19400 rtx *pop0, rtx *pop1)
19408 /* AVX supports all the needed comparisons. */
19411 /* We have no LTGT as an operator. We could implement it with
19412 NE & ORDERED, but this requires an extra temporary. It's
19413 not clear that it's worth it. */
19420 /* These are supported directly. */
19427 /* AVX has 3 operand comparisons, no need to swap anything. */
19430 /* For commutative operators, try to canonicalize the destination
19431 operand to be first in the comparison - this helps reload to
19432 avoid extra moves. */
19433 if (!dest || !rtx_equal_p (dest, *pop1))
19441 /* These are not supported directly before AVX, and furthermore
19442 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19443 comparison operands to transform into something that is
19448 code = swap_condition (code);
19452 gcc_unreachable ();
19458 /* Detect conditional moves that exactly match min/max operational
19459 semantics. Note that this is IEEE safe, as long as we don't
19460 interchange the operands.
19462 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19463 and TRUE if the operation is successful and instructions are emitted. */
19466 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
19467 rtx cmp_op1, rtx if_true, rtx if_false)
19469 enum machine_mode mode;
19475 else if (code == UNGE)
19478 if_true = if_false;
19484 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
19486 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
19491 mode = GET_MODE (dest);
19493 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19494 but MODE may be a vector mode and thus not appropriate. */
19495 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
19497 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
19500 if_true = force_reg (mode, if_true);
19501 v = gen_rtvec (2, if_true, if_false);
19502 tmp = gen_rtx_UNSPEC (mode, v, u);
19506 code = is_min ? SMIN : SMAX;
19507 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
19510 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
19514 /* Expand an sse vector comparison. Return the register with the result. */
19517 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
19518 rtx op_true, rtx op_false)
19520 enum machine_mode mode = GET_MODE (dest);
19521 enum machine_mode cmp_mode = GET_MODE (cmp_op0);
19524 cmp_op0 = force_reg (cmp_mode, cmp_op0);
19525 if (!nonimmediate_operand (cmp_op1, cmp_mode))
19526 cmp_op1 = force_reg (cmp_mode, cmp_op1);
19529 || reg_overlap_mentioned_p (dest, op_true)
19530 || reg_overlap_mentioned_p (dest, op_false))
19531 dest = gen_reg_rtx (mode);
19533 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
19534 if (cmp_mode != mode)
19536 x = force_reg (cmp_mode, x);
19537 convert_move (dest, x, false);
19540 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19545 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19546 operations. This is used for both scalar and vector conditional moves. */
19549 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
19551 enum machine_mode mode = GET_MODE (dest);
19554 if (vector_all_ones_operand (op_true, mode)
19555 && rtx_equal_p (op_false, CONST0_RTX (mode)))
19557 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
19559 else if (op_false == CONST0_RTX (mode))
19561 op_true = force_reg (mode, op_true);
19562 x = gen_rtx_AND (mode, cmp, op_true);
19563 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19565 else if (op_true == CONST0_RTX (mode))
19567 op_false = force_reg (mode, op_false);
19568 x = gen_rtx_NOT (mode, cmp);
19569 x = gen_rtx_AND (mode, x, op_false);
19570 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19572 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
19574 op_false = force_reg (mode, op_false);
19575 x = gen_rtx_IOR (mode, cmp, op_false);
19576 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19578 else if (TARGET_XOP)
19580 op_true = force_reg (mode, op_true);
19582 if (!nonimmediate_operand (op_false, mode))
19583 op_false = force_reg (mode, op_false);
19585 emit_insn (gen_rtx_SET (mode, dest,
19586 gen_rtx_IF_THEN_ELSE (mode, cmp,
19592 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
19594 if (!nonimmediate_operand (op_true, mode))
19595 op_true = force_reg (mode, op_true);
19597 op_false = force_reg (mode, op_false);
19603 gen = gen_sse4_1_blendvps;
19607 gen = gen_sse4_1_blendvpd;
19615 gen = gen_sse4_1_pblendvb;
19616 dest = gen_lowpart (V16QImode, dest);
19617 op_false = gen_lowpart (V16QImode, op_false);
19618 op_true = gen_lowpart (V16QImode, op_true);
19619 cmp = gen_lowpart (V16QImode, cmp);
19624 gen = gen_avx_blendvps256;
19628 gen = gen_avx_blendvpd256;
19636 gen = gen_avx2_pblendvb;
19637 dest = gen_lowpart (V32QImode, dest);
19638 op_false = gen_lowpart (V32QImode, op_false);
19639 op_true = gen_lowpart (V32QImode, op_true);
19640 cmp = gen_lowpart (V32QImode, cmp);
19648 emit_insn (gen (dest, op_false, op_true, cmp));
19651 op_true = force_reg (mode, op_true);
19653 t2 = gen_reg_rtx (mode);
19655 t3 = gen_reg_rtx (mode);
19659 x = gen_rtx_AND (mode, op_true, cmp);
19660 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
19662 x = gen_rtx_NOT (mode, cmp);
19663 x = gen_rtx_AND (mode, x, op_false);
19664 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
19666 x = gen_rtx_IOR (mode, t3, t2);
19667 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19672 /* Expand a floating-point conditional move. Return true if successful. */
19675 ix86_expand_fp_movcc (rtx operands[])
19677 enum machine_mode mode = GET_MODE (operands[0]);
19678 enum rtx_code code = GET_CODE (operands[1]);
19679 rtx tmp, compare_op;
19680 rtx op0 = XEXP (operands[1], 0);
19681 rtx op1 = XEXP (operands[1], 1);
19683 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
19685 enum machine_mode cmode;
19687 /* Since we've no cmove for sse registers, don't force bad register
19688 allocation just to gain access to it. Deny movcc when the
19689 comparison mode doesn't match the move mode. */
19690 cmode = GET_MODE (op0);
19691 if (cmode == VOIDmode)
19692 cmode = GET_MODE (op1);
19696 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
19697 if (code == UNKNOWN)
19700 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
19701 operands[2], operands[3]))
19704 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
19705 operands[2], operands[3]);
19706 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
19710 /* The floating point conditional move instructions don't directly
19711 support conditions resulting from a signed integer comparison. */
19713 compare_op = ix86_expand_compare (code, op0, op1);
19714 if (!fcmov_comparison_operator (compare_op, VOIDmode))
19716 tmp = gen_reg_rtx (QImode);
19717 ix86_expand_setcc (tmp, code, op0, op1);
19719 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
19722 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19723 gen_rtx_IF_THEN_ELSE (mode, compare_op,
19724 operands[2], operands[3])));
19729 /* Expand a floating-point vector conditional move; a vcond operation
19730 rather than a movcc operation. */
19733 ix86_expand_fp_vcond (rtx operands[])
19735 enum rtx_code code = GET_CODE (operands[3]);
19738 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
19739 &operands[4], &operands[5]);
19740 if (code == UNKNOWN)
19743 switch (GET_CODE (operands[3]))
19746 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
19747 operands[5], operands[0], operands[0]);
19748 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
19749 operands[5], operands[1], operands[2]);
19753 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
19754 operands[5], operands[0], operands[0]);
19755 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
19756 operands[5], operands[1], operands[2]);
19760 gcc_unreachable ();
19762 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
19764 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19768 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
19769 operands[5], operands[1], operands[2]))
19772 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
19773 operands[1], operands[2]);
19774 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19778 /* Expand a signed/unsigned integral vector conditional move. */
19781 ix86_expand_int_vcond (rtx operands[])
19783 enum machine_mode data_mode = GET_MODE (operands[0]);
19784 enum machine_mode mode = GET_MODE (operands[4]);
19785 enum rtx_code code = GET_CODE (operands[3]);
19786 bool negate = false;
19789 cop0 = operands[4];
19790 cop1 = operands[5];
19792 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
19793 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
19794 if ((code == LT || code == GE)
19795 && data_mode == mode
19796 && cop1 == CONST0_RTX (mode)
19797 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
19798 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
19799 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
19800 && (GET_MODE_SIZE (data_mode) == 16
19801 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
19803 rtx negop = operands[2 - (code == LT)];
19804 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
19805 if (negop == CONST1_RTX (data_mode))
19807 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
19808 operands[0], 1, OPTAB_DIRECT);
19809 if (res != operands[0])
19810 emit_move_insn (operands[0], res);
19813 else if (GET_MODE_INNER (data_mode) != DImode
19814 && vector_all_ones_operand (negop, data_mode))
19816 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
19817 operands[0], 0, OPTAB_DIRECT);
19818 if (res != operands[0])
19819 emit_move_insn (operands[0], res);
19824 if (!nonimmediate_operand (cop1, mode))
19825 cop1 = force_reg (mode, cop1);
19826 if (!general_operand (operands[1], data_mode))
19827 operands[1] = force_reg (data_mode, operands[1]);
19828 if (!general_operand (operands[2], data_mode))
19829 operands[2] = force_reg (data_mode, operands[2]);
19831 /* XOP supports all of the comparisons on all 128-bit vector int types. */
19833 && (mode == V16QImode || mode == V8HImode
19834 || mode == V4SImode || mode == V2DImode))
19838 /* Canonicalize the comparison to EQ, GT, GTU. */
19849 code = reverse_condition (code);
19855 code = reverse_condition (code);
19861 code = swap_condition (code);
19862 x = cop0, cop0 = cop1, cop1 = x;
19866 gcc_unreachable ();
19869 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19870 if (mode == V2DImode)
19875 /* SSE4.1 supports EQ. */
19876 if (!TARGET_SSE4_1)
19882 /* SSE4.2 supports GT/GTU. */
19883 if (!TARGET_SSE4_2)
19888 gcc_unreachable ();
19892 /* Unsigned parallel compare is not supported by the hardware.
19893 Play some tricks to turn this into a signed comparison
19897 cop0 = force_reg (mode, cop0);
19907 rtx (*gen_sub3) (rtx, rtx, rtx);
19911 case V8SImode: gen_sub3 = gen_subv8si3; break;
19912 case V4DImode: gen_sub3 = gen_subv4di3; break;
19913 case V4SImode: gen_sub3 = gen_subv4si3; break;
19914 case V2DImode: gen_sub3 = gen_subv2di3; break;
19916 gcc_unreachable ();
19918 /* Subtract (-(INT MAX) - 1) from both operands to make
19920 mask = ix86_build_signbit_mask (mode, true, false);
19921 t1 = gen_reg_rtx (mode);
19922 emit_insn (gen_sub3 (t1, cop0, mask));
19924 t2 = gen_reg_rtx (mode);
19925 emit_insn (gen_sub3 (t2, cop1, mask));
19937 /* Perform a parallel unsigned saturating subtraction. */
19938 x = gen_reg_rtx (mode);
19939 emit_insn (gen_rtx_SET (VOIDmode, x,
19940 gen_rtx_US_MINUS (mode, cop0, cop1)));
19943 cop1 = CONST0_RTX (mode);
19949 gcc_unreachable ();
19954 /* Allow the comparison to be done in one mode, but the movcc to
19955 happen in another mode. */
19956 if (data_mode == mode)
19958 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19959 operands[1+negate], operands[2-negate]);
19963 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
19964 x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]),
19966 operands[1+negate], operands[2-negate]);
19967 x = gen_lowpart (data_mode, x);
19970 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19971 operands[2-negate]);
19975 /* Expand a variable vector permutation. */
19978 ix86_expand_vec_perm (rtx operands[])
19980 rtx target = operands[0];
19981 rtx op0 = operands[1];
19982 rtx op1 = operands[2];
19983 rtx mask = operands[3];
19984 rtx t1, t2, t3, t4, vt, vt2, vec[32];
19985 enum machine_mode mode = GET_MODE (op0);
19986 enum machine_mode maskmode = GET_MODE (mask);
19988 bool one_operand_shuffle = rtx_equal_p (op0, op1);
19990 /* Number of elements in the vector. */
19991 w = GET_MODE_NUNITS (mode);
19992 e = GET_MODE_UNIT_SIZE (mode);
19993 gcc_assert (w <= 32);
19997 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
19999 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20000 an constant shuffle operand. With a tiny bit of effort we can
20001 use VPERMD instead. A re-interpretation stall for V4DFmode is
20002 unfortunate but there's no avoiding it.
20003 Similarly for V16HImode we don't have instructions for variable
20004 shuffling, while for V32QImode we can use after preparing suitable
20005 masks vpshufb; vpshufb; vpermq; vpor. */
20007 if (mode == V16HImode)
20009 maskmode = mode = V32QImode;
20015 maskmode = mode = V8SImode;
20019 t1 = gen_reg_rtx (maskmode);
20021 /* Replicate the low bits of the V4DImode mask into V8SImode:
20023 t1 = { A A B B C C D D }. */
20024 for (i = 0; i < w / 2; ++i)
20025 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
20026 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20027 vt = force_reg (maskmode, vt);
20028 mask = gen_lowpart (maskmode, mask);
20029 if (maskmode == V8SImode)
20030 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
20032 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
20034 /* Multiply the shuffle indicies by two. */
20035 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
20038 /* Add one to the odd shuffle indicies:
20039 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20040 for (i = 0; i < w / 2; ++i)
20042 vec[i * 2] = const0_rtx;
20043 vec[i * 2 + 1] = const1_rtx;
20045 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20046 vt = validize_mem (force_const_mem (maskmode, vt));
20047 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
20050 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20051 operands[3] = mask = t1;
20052 target = gen_lowpart (mode, target);
20053 op0 = gen_lowpart (mode, op0);
20054 op1 = gen_lowpart (mode, op1);
20060 /* The VPERMD and VPERMPS instructions already properly ignore
20061 the high bits of the shuffle elements. No need for us to
20062 perform an AND ourselves. */
20063 if (one_operand_shuffle)
20064 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
20067 t1 = gen_reg_rtx (V8SImode);
20068 t2 = gen_reg_rtx (V8SImode);
20069 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
20070 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
20076 mask = gen_lowpart (V8SImode, mask);
20077 if (one_operand_shuffle)
20078 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
20081 t1 = gen_reg_rtx (V8SFmode);
20082 t2 = gen_reg_rtx (V8SFmode);
20083 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
20084 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
20090 /* By combining the two 128-bit input vectors into one 256-bit
20091 input vector, we can use VPERMD and VPERMPS for the full
20092 two-operand shuffle. */
20093 t1 = gen_reg_rtx (V8SImode);
20094 t2 = gen_reg_rtx (V8SImode);
20095 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
20096 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
20097 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
20098 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
20102 t1 = gen_reg_rtx (V8SFmode);
20103 t2 = gen_reg_rtx (V8SImode);
20104 mask = gen_lowpart (V4SImode, mask);
20105 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
20106 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
20107 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
20108 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
20112 t1 = gen_reg_rtx (V32QImode);
20113 t2 = gen_reg_rtx (V32QImode);
20114 t3 = gen_reg_rtx (V32QImode);
20115 vt2 = GEN_INT (128);
20116 for (i = 0; i < 32; i++)
20118 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
20119 vt = force_reg (V32QImode, vt);
20120 for (i = 0; i < 32; i++)
20121 vec[i] = i < 16 ? vt2 : const0_rtx;
20122 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
20123 vt2 = force_reg (V32QImode, vt2);
20124 /* From mask create two adjusted masks, which contain the same
20125 bits as mask in the low 7 bits of each vector element.
20126 The first mask will have the most significant bit clear
20127 if it requests element from the same 128-bit lane
20128 and MSB set if it requests element from the other 128-bit lane.
20129 The second mask will have the opposite values of the MSB,
20130 and additionally will have its 128-bit lanes swapped.
20131 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20132 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20133 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20134 stands for other 12 bytes. */
20135 /* The bit whether element is from the same lane or the other
20136 lane is bit 4, so shift it up by 3 to the MSB position. */
20137 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode, t1),
20138 gen_lowpart (V4DImode, mask),
20140 /* Clear MSB bits from the mask just in case it had them set. */
20141 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
20142 /* After this t1 will have MSB set for elements from other lane. */
20143 emit_insn (gen_xorv32qi3 (t1, t1, vt2));
20144 /* Clear bits other than MSB. */
20145 emit_insn (gen_andv32qi3 (t1, t1, vt));
20146 /* Or in the lower bits from mask into t3. */
20147 emit_insn (gen_iorv32qi3 (t3, t1, t2));
20148 /* And invert MSB bits in t1, so MSB is set for elements from the same
20150 emit_insn (gen_xorv32qi3 (t1, t1, vt));
20151 /* Swap 128-bit lanes in t3. */
20152 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20153 gen_lowpart (V4DImode, t3),
20154 const2_rtx, GEN_INT (3),
20155 const0_rtx, const1_rtx));
20156 /* And or in the lower bits from mask into t1. */
20157 emit_insn (gen_iorv32qi3 (t1, t1, t2));
20158 if (one_operand_shuffle)
20160 /* Each of these shuffles will put 0s in places where
20161 element from the other 128-bit lane is needed, otherwise
20162 will shuffle in the requested value. */
20163 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, t3));
20164 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
20165 /* For t3 the 128-bit lanes are swapped again. */
20166 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20167 gen_lowpart (V4DImode, t3),
20168 const2_rtx, GEN_INT (3),
20169 const0_rtx, const1_rtx));
20170 /* And oring both together leads to the result. */
20171 emit_insn (gen_iorv32qi3 (target, t1, t3));
20175 t4 = gen_reg_rtx (V32QImode);
20176 /* Similarly to the above one_operand_shuffle code,
20177 just for repeated twice for each operand. merge_two:
20178 code will merge the two results together. */
20179 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, t3));
20180 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, t3));
20181 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
20182 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
20183 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t4),
20184 gen_lowpart (V4DImode, t4),
20185 const2_rtx, GEN_INT (3),
20186 const0_rtx, const1_rtx));
20187 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20188 gen_lowpart (V4DImode, t3),
20189 const2_rtx, GEN_INT (3),
20190 const0_rtx, const1_rtx));
20191 emit_insn (gen_iorv32qi3 (t4, t2, t4));
20192 emit_insn (gen_iorv32qi3 (t3, t1, t3));
20198 gcc_assert (GET_MODE_SIZE (mode) <= 16);
20205 /* The XOP VPPERM insn supports three inputs. By ignoring the
20206 one_operand_shuffle special case, we avoid creating another
20207 set of constant vectors in memory. */
20208 one_operand_shuffle = false;
20210 /* mask = mask & {2*w-1, ...} */
20211 vt = GEN_INT (2*w - 1);
20215 /* mask = mask & {w-1, ...} */
20216 vt = GEN_INT (w - 1);
20219 for (i = 0; i < w; i++)
20221 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20222 mask = expand_simple_binop (maskmode, AND, mask, vt,
20223 NULL_RTX, 0, OPTAB_DIRECT);
20225 /* For non-QImode operations, convert the word permutation control
20226 into a byte permutation control. */
20227 if (mode != V16QImode)
20229 mask = expand_simple_binop (maskmode, ASHIFT, mask,
20230 GEN_INT (exact_log2 (e)),
20231 NULL_RTX, 0, OPTAB_DIRECT);
20233 /* Convert mask to vector of chars. */
20234 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
20236 /* Replicate each of the input bytes into byte positions:
20237 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20238 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20239 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20240 for (i = 0; i < 16; ++i)
20241 vec[i] = GEN_INT (i/e * e);
20242 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20243 vt = validize_mem (force_const_mem (V16QImode, vt));
20245 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
20247 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
20249 /* Convert it into the byte positions by doing
20250 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20251 for (i = 0; i < 16; ++i)
20252 vec[i] = GEN_INT (i % e);
20253 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20254 vt = validize_mem (force_const_mem (V16QImode, vt));
20255 emit_insn (gen_addv16qi3 (mask, mask, vt));
20258 /* The actual shuffle operations all operate on V16QImode. */
20259 op0 = gen_lowpart (V16QImode, op0);
20260 op1 = gen_lowpart (V16QImode, op1);
20261 target = gen_lowpart (V16QImode, target);
20265 emit_insn (gen_xop_pperm (target, op0, op1, mask));
20267 else if (one_operand_shuffle)
20269 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
20276 /* Shuffle the two input vectors independently. */
20277 t1 = gen_reg_rtx (V16QImode);
20278 t2 = gen_reg_rtx (V16QImode);
20279 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
20280 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
20283 /* Then merge them together. The key is whether any given control
20284 element contained a bit set that indicates the second word. */
20285 mask = operands[3];
20287 if (maskmode == V2DImode && !TARGET_SSE4_1)
20289 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20290 more shuffle to convert the V2DI input mask into a V4SI
20291 input mask. At which point the masking that expand_int_vcond
20292 will work as desired. */
20293 rtx t3 = gen_reg_rtx (V4SImode);
20294 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
20295 const0_rtx, const0_rtx,
20296 const2_rtx, const2_rtx));
20298 maskmode = V4SImode;
20302 for (i = 0; i < w; i++)
20304 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20305 vt = force_reg (maskmode, vt);
20306 mask = expand_simple_binop (maskmode, AND, mask, vt,
20307 NULL_RTX, 0, OPTAB_DIRECT);
20309 xops[0] = gen_lowpart (mode, operands[0]);
20310 xops[1] = gen_lowpart (mode, t2);
20311 xops[2] = gen_lowpart (mode, t1);
20312 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
20315 ok = ix86_expand_int_vcond (xops);
20320 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20321 true if we should do zero extension, else sign extension. HIGH_P is
20322 true if we want the N/2 high elements, else the low elements. */
20325 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
20327 enum machine_mode imode = GET_MODE (operands[1]);
20332 rtx (*unpack)(rtx, rtx);
20333 rtx (*extract)(rtx, rtx) = NULL;
20334 enum machine_mode halfmode = BLKmode;
20340 unpack = gen_avx2_zero_extendv16qiv16hi2;
20342 unpack = gen_avx2_sign_extendv16qiv16hi2;
20343 halfmode = V16QImode;
20345 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
20349 unpack = gen_avx2_zero_extendv8hiv8si2;
20351 unpack = gen_avx2_sign_extendv8hiv8si2;
20352 halfmode = V8HImode;
20354 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
20358 unpack = gen_avx2_zero_extendv4siv4di2;
20360 unpack = gen_avx2_sign_extendv4siv4di2;
20361 halfmode = V4SImode;
20363 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
20367 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
20369 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
20373 unpack = gen_sse4_1_zero_extendv4hiv4si2;
20375 unpack = gen_sse4_1_sign_extendv4hiv4si2;
20379 unpack = gen_sse4_1_zero_extendv2siv2di2;
20381 unpack = gen_sse4_1_sign_extendv2siv2di2;
20384 gcc_unreachable ();
20387 if (GET_MODE_SIZE (imode) == 32)
20389 tmp = gen_reg_rtx (halfmode);
20390 emit_insn (extract (tmp, operands[1]));
20394 /* Shift higher 8 bytes to lower 8 bytes. */
20395 tmp = gen_reg_rtx (imode);
20396 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
20397 gen_lowpart (V1TImode, operands[1]),
20403 emit_insn (unpack (operands[0], tmp));
20407 rtx (*unpack)(rtx, rtx, rtx);
20413 unpack = gen_vec_interleave_highv16qi;
20415 unpack = gen_vec_interleave_lowv16qi;
20419 unpack = gen_vec_interleave_highv8hi;
20421 unpack = gen_vec_interleave_lowv8hi;
20425 unpack = gen_vec_interleave_highv4si;
20427 unpack = gen_vec_interleave_lowv4si;
20430 gcc_unreachable ();
20433 dest = gen_lowpart (imode, operands[0]);
20436 tmp = force_reg (imode, CONST0_RTX (imode));
20438 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
20439 operands[1], pc_rtx, pc_rtx);
20441 emit_insn (unpack (dest, operands[1], tmp));
20445 /* Expand conditional increment or decrement using adb/sbb instructions.
20446 The default case using setcc followed by the conditional move can be
20447 done by generic code. */
20449 ix86_expand_int_addcc (rtx operands[])
20451 enum rtx_code code = GET_CODE (operands[1]);
20453 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
20455 rtx val = const0_rtx;
20456 bool fpcmp = false;
20457 enum machine_mode mode;
20458 rtx op0 = XEXP (operands[1], 0);
20459 rtx op1 = XEXP (operands[1], 1);
20461 if (operands[3] != const1_rtx
20462 && operands[3] != constm1_rtx)
20464 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20466 code = GET_CODE (compare_op);
20468 flags = XEXP (compare_op, 0);
20470 if (GET_MODE (flags) == CCFPmode
20471 || GET_MODE (flags) == CCFPUmode)
20474 code = ix86_fp_compare_code_to_integer (code);
20481 PUT_CODE (compare_op,
20482 reverse_condition_maybe_unordered
20483 (GET_CODE (compare_op)));
20485 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
20488 mode = GET_MODE (operands[0]);
20490 /* Construct either adc or sbb insn. */
20491 if ((code == LTU) == (operands[3] == constm1_rtx))
20496 insn = gen_subqi3_carry;
20499 insn = gen_subhi3_carry;
20502 insn = gen_subsi3_carry;
20505 insn = gen_subdi3_carry;
20508 gcc_unreachable ();
20516 insn = gen_addqi3_carry;
20519 insn = gen_addhi3_carry;
20522 insn = gen_addsi3_carry;
20525 insn = gen_adddi3_carry;
20528 gcc_unreachable ();
20531 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
20537 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20538 but works for floating pointer parameters and nonoffsetable memories.
20539 For pushes, it returns just stack offsets; the values will be saved
20540 in the right order. Maximally three parts are generated. */
20543 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
20548 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
20550 size = (GET_MODE_SIZE (mode) + 4) / 8;
20552 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
20553 gcc_assert (size >= 2 && size <= 4);
20555 /* Optimize constant pool reference to immediates. This is used by fp
20556 moves, that force all constants to memory to allow combining. */
20557 if (MEM_P (operand) && MEM_READONLY_P (operand))
20559 rtx tmp = maybe_get_pool_constant (operand);
20564 if (MEM_P (operand) && !offsettable_memref_p (operand))
20566 /* The only non-offsetable memories we handle are pushes. */
20567 int ok = push_operand (operand, VOIDmode);
20571 operand = copy_rtx (operand);
20572 PUT_MODE (operand, Pmode);
20573 parts[0] = parts[1] = parts[2] = parts[3] = operand;
20577 if (GET_CODE (operand) == CONST_VECTOR)
20579 enum machine_mode imode = int_mode_for_mode (mode);
20580 /* Caution: if we looked through a constant pool memory above,
20581 the operand may actually have a different mode now. That's
20582 ok, since we want to pun this all the way back to an integer. */
20583 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
20584 gcc_assert (operand != NULL);
20590 if (mode == DImode)
20591 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
20596 if (REG_P (operand))
20598 gcc_assert (reload_completed);
20599 for (i = 0; i < size; i++)
20600 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
20602 else if (offsettable_memref_p (operand))
20604 operand = adjust_address (operand, SImode, 0);
20605 parts[0] = operand;
20606 for (i = 1; i < size; i++)
20607 parts[i] = adjust_address (operand, SImode, 4 * i);
20609 else if (GET_CODE (operand) == CONST_DOUBLE)
20614 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
20618 real_to_target (l, &r, mode);
20619 parts[3] = gen_int_mode (l[3], SImode);
20620 parts[2] = gen_int_mode (l[2], SImode);
20623 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
20624 parts[2] = gen_int_mode (l[2], SImode);
20627 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
20630 gcc_unreachable ();
20632 parts[1] = gen_int_mode (l[1], SImode);
20633 parts[0] = gen_int_mode (l[0], SImode);
20636 gcc_unreachable ();
20641 if (mode == TImode)
20642 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
20643 if (mode == XFmode || mode == TFmode)
20645 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
20646 if (REG_P (operand))
20648 gcc_assert (reload_completed);
20649 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
20650 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
20652 else if (offsettable_memref_p (operand))
20654 operand = adjust_address (operand, DImode, 0);
20655 parts[0] = operand;
20656 parts[1] = adjust_address (operand, upper_mode, 8);
20658 else if (GET_CODE (operand) == CONST_DOUBLE)
20663 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
20664 real_to_target (l, &r, mode);
20666 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20667 if (HOST_BITS_PER_WIDE_INT >= 64)
20670 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
20671 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
20674 parts[0] = immed_double_const (l[0], l[1], DImode);
20676 if (upper_mode == SImode)
20677 parts[1] = gen_int_mode (l[2], SImode);
20678 else if (HOST_BITS_PER_WIDE_INT >= 64)
20681 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
20682 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
20685 parts[1] = immed_double_const (l[2], l[3], DImode);
20688 gcc_unreachable ();
20695 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20696 Return false when normal moves are needed; true when all required
20697 insns have been emitted. Operands 2-4 contain the input values
20698 int the correct order; operands 5-7 contain the output values. */
20701 ix86_split_long_move (rtx operands[])
20706 int collisions = 0;
20707 enum machine_mode mode = GET_MODE (operands[0]);
20708 bool collisionparts[4];
20710 /* The DFmode expanders may ask us to move double.
20711 For 64bit target this is single move. By hiding the fact
20712 here we simplify i386.md splitters. */
20713 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
20715 /* Optimize constant pool reference to immediates. This is used by
20716 fp moves, that force all constants to memory to allow combining. */
20718 if (MEM_P (operands[1])
20719 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
20720 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
20721 operands[1] = get_pool_constant (XEXP (operands[1], 0));
20722 if (push_operand (operands[0], VOIDmode))
20724 operands[0] = copy_rtx (operands[0]);
20725 PUT_MODE (operands[0], Pmode);
20728 operands[0] = gen_lowpart (DImode, operands[0]);
20729 operands[1] = gen_lowpart (DImode, operands[1]);
20730 emit_move_insn (operands[0], operands[1]);
20734 /* The only non-offsettable memory we handle is push. */
20735 if (push_operand (operands[0], VOIDmode))
20738 gcc_assert (!MEM_P (operands[0])
20739 || offsettable_memref_p (operands[0]));
20741 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
20742 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
20744 /* When emitting push, take care for source operands on the stack. */
20745 if (push && MEM_P (operands[1])
20746 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
20748 rtx src_base = XEXP (part[1][nparts - 1], 0);
20750 /* Compensate for the stack decrement by 4. */
20751 if (!TARGET_64BIT && nparts == 3
20752 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
20753 src_base = plus_constant (src_base, 4);
20755 /* src_base refers to the stack pointer and is
20756 automatically decreased by emitted push. */
20757 for (i = 0; i < nparts; i++)
20758 part[1][i] = change_address (part[1][i],
20759 GET_MODE (part[1][i]), src_base);
20762 /* We need to do copy in the right order in case an address register
20763 of the source overlaps the destination. */
20764 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
20768 for (i = 0; i < nparts; i++)
20771 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
20772 if (collisionparts[i])
20776 /* Collision in the middle part can be handled by reordering. */
20777 if (collisions == 1 && nparts == 3 && collisionparts [1])
20779 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20780 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20782 else if (collisions == 1
20784 && (collisionparts [1] || collisionparts [2]))
20786 if (collisionparts [1])
20788 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20789 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20793 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
20794 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
20798 /* If there are more collisions, we can't handle it by reordering.
20799 Do an lea to the last part and use only one colliding move. */
20800 else if (collisions > 1)
20806 base = part[0][nparts - 1];
20808 /* Handle the case when the last part isn't valid for lea.
20809 Happens in 64-bit mode storing the 12-byte XFmode. */
20810 if (GET_MODE (base) != Pmode)
20811 base = gen_rtx_REG (Pmode, REGNO (base));
20813 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
20814 part[1][0] = replace_equiv_address (part[1][0], base);
20815 for (i = 1; i < nparts; i++)
20817 tmp = plus_constant (base, UNITS_PER_WORD * i);
20818 part[1][i] = replace_equiv_address (part[1][i], tmp);
20829 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
20830 emit_insn (gen_addsi3 (stack_pointer_rtx,
20831 stack_pointer_rtx, GEN_INT (-4)));
20832 emit_move_insn (part[0][2], part[1][2]);
20834 else if (nparts == 4)
20836 emit_move_insn (part[0][3], part[1][3]);
20837 emit_move_insn (part[0][2], part[1][2]);
20842 /* In 64bit mode we don't have 32bit push available. In case this is
20843 register, it is OK - we will just use larger counterpart. We also
20844 retype memory - these comes from attempt to avoid REX prefix on
20845 moving of second half of TFmode value. */
20846 if (GET_MODE (part[1][1]) == SImode)
20848 switch (GET_CODE (part[1][1]))
20851 part[1][1] = adjust_address (part[1][1], DImode, 0);
20855 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
20859 gcc_unreachable ();
20862 if (GET_MODE (part[1][0]) == SImode)
20863 part[1][0] = part[1][1];
20866 emit_move_insn (part[0][1], part[1][1]);
20867 emit_move_insn (part[0][0], part[1][0]);
20871 /* Choose correct order to not overwrite the source before it is copied. */
20872 if ((REG_P (part[0][0])
20873 && REG_P (part[1][1])
20874 && (REGNO (part[0][0]) == REGNO (part[1][1])
20876 && REGNO (part[0][0]) == REGNO (part[1][2]))
20878 && REGNO (part[0][0]) == REGNO (part[1][3]))))
20880 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
20882 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
20884 operands[2 + i] = part[0][j];
20885 operands[6 + i] = part[1][j];
20890 for (i = 0; i < nparts; i++)
20892 operands[2 + i] = part[0][i];
20893 operands[6 + i] = part[1][i];
20897 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20898 if (optimize_insn_for_size_p ())
20900 for (j = 0; j < nparts - 1; j++)
20901 if (CONST_INT_P (operands[6 + j])
20902 && operands[6 + j] != const0_rtx
20903 && REG_P (operands[2 + j]))
20904 for (i = j; i < nparts - 1; i++)
20905 if (CONST_INT_P (operands[7 + i])
20906 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
20907 operands[7 + i] = operands[2 + j];
20910 for (i = 0; i < nparts; i++)
20911 emit_move_insn (operands[2 + i], operands[6 + i]);
20916 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20917 left shift by a constant, either using a single shift or
20918 a sequence of add instructions. */
20921 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
20923 rtx (*insn)(rtx, rtx, rtx);
20926 || (count * ix86_cost->add <= ix86_cost->shift_const
20927 && !optimize_insn_for_size_p ()))
20929 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
20930 while (count-- > 0)
20931 emit_insn (insn (operand, operand, operand));
20935 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20936 emit_insn (insn (operand, operand, GEN_INT (count)));
20941 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
20943 rtx (*gen_ashl3)(rtx, rtx, rtx);
20944 rtx (*gen_shld)(rtx, rtx, rtx);
20945 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20947 rtx low[2], high[2];
20950 if (CONST_INT_P (operands[2]))
20952 split_double_mode (mode, operands, 2, low, high);
20953 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20955 if (count >= half_width)
20957 emit_move_insn (high[0], low[1]);
20958 emit_move_insn (low[0], const0_rtx);
20960 if (count > half_width)
20961 ix86_expand_ashl_const (high[0], count - half_width, mode);
20965 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20967 if (!rtx_equal_p (operands[0], operands[1]))
20968 emit_move_insn (operands[0], operands[1]);
20970 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
20971 ix86_expand_ashl_const (low[0], count, mode);
20976 split_double_mode (mode, operands, 1, low, high);
20978 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20980 if (operands[1] == const1_rtx)
20982 /* Assuming we've chosen a QImode capable registers, then 1 << N
20983 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20984 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
20986 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
20988 ix86_expand_clear (low[0]);
20989 ix86_expand_clear (high[0]);
20990 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
20992 d = gen_lowpart (QImode, low[0]);
20993 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20994 s = gen_rtx_EQ (QImode, flags, const0_rtx);
20995 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20997 d = gen_lowpart (QImode, high[0]);
20998 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20999 s = gen_rtx_NE (QImode, flags, const0_rtx);
21000 emit_insn (gen_rtx_SET (VOIDmode, d, s));
21003 /* Otherwise, we can get the same results by manually performing
21004 a bit extract operation on bit 5/6, and then performing the two
21005 shifts. The two methods of getting 0/1 into low/high are exactly
21006 the same size. Avoiding the shift in the bit extract case helps
21007 pentium4 a bit; no one else seems to care much either way. */
21010 enum machine_mode half_mode;
21011 rtx (*gen_lshr3)(rtx, rtx, rtx);
21012 rtx (*gen_and3)(rtx, rtx, rtx);
21013 rtx (*gen_xor3)(rtx, rtx, rtx);
21014 HOST_WIDE_INT bits;
21017 if (mode == DImode)
21019 half_mode = SImode;
21020 gen_lshr3 = gen_lshrsi3;
21021 gen_and3 = gen_andsi3;
21022 gen_xor3 = gen_xorsi3;
21027 half_mode = DImode;
21028 gen_lshr3 = gen_lshrdi3;
21029 gen_and3 = gen_anddi3;
21030 gen_xor3 = gen_xordi3;
21034 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
21035 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
21037 x = gen_lowpart (half_mode, operands[2]);
21038 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
21040 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
21041 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
21042 emit_move_insn (low[0], high[0]);
21043 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
21046 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
21047 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
21051 if (operands[1] == constm1_rtx)
21053 /* For -1 << N, we can avoid the shld instruction, because we
21054 know that we're shifting 0...31/63 ones into a -1. */
21055 emit_move_insn (low[0], constm1_rtx);
21056 if (optimize_insn_for_size_p ())
21057 emit_move_insn (high[0], low[0]);
21059 emit_move_insn (high[0], constm1_rtx);
21063 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
21065 if (!rtx_equal_p (operands[0], operands[1]))
21066 emit_move_insn (operands[0], operands[1]);
21068 split_double_mode (mode, operands, 1, low, high);
21069 emit_insn (gen_shld (high[0], low[0], operands[2]));
21072 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
21074 if (TARGET_CMOVE && scratch)
21076 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21077 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21079 ix86_expand_clear (scratch);
21080 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
21084 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
21085 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
21087 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
21092 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
21094 rtx (*gen_ashr3)(rtx, rtx, rtx)
21095 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
21096 rtx (*gen_shrd)(rtx, rtx, rtx);
21097 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21099 rtx low[2], high[2];
21102 if (CONST_INT_P (operands[2]))
21104 split_double_mode (mode, operands, 2, low, high);
21105 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21107 if (count == GET_MODE_BITSIZE (mode) - 1)
21109 emit_move_insn (high[0], high[1]);
21110 emit_insn (gen_ashr3 (high[0], high[0],
21111 GEN_INT (half_width - 1)));
21112 emit_move_insn (low[0], high[0]);
21115 else if (count >= half_width)
21117 emit_move_insn (low[0], high[1]);
21118 emit_move_insn (high[0], low[0]);
21119 emit_insn (gen_ashr3 (high[0], high[0],
21120 GEN_INT (half_width - 1)));
21122 if (count > half_width)
21123 emit_insn (gen_ashr3 (low[0], low[0],
21124 GEN_INT (count - half_width)));
21128 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21130 if (!rtx_equal_p (operands[0], operands[1]))
21131 emit_move_insn (operands[0], operands[1]);
21133 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
21134 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
21139 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21141 if (!rtx_equal_p (operands[0], operands[1]))
21142 emit_move_insn (operands[0], operands[1]);
21144 split_double_mode (mode, operands, 1, low, high);
21146 emit_insn (gen_shrd (low[0], high[0], operands[2]));
21147 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
21149 if (TARGET_CMOVE && scratch)
21151 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21152 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21154 emit_move_insn (scratch, high[0]);
21155 emit_insn (gen_ashr3 (scratch, scratch,
21156 GEN_INT (half_width - 1)));
21157 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21162 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
21163 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
21165 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
21171 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
21173 rtx (*gen_lshr3)(rtx, rtx, rtx)
21174 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
21175 rtx (*gen_shrd)(rtx, rtx, rtx);
21176 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21178 rtx low[2], high[2];
21181 if (CONST_INT_P (operands[2]))
21183 split_double_mode (mode, operands, 2, low, high);
21184 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21186 if (count >= half_width)
21188 emit_move_insn (low[0], high[1]);
21189 ix86_expand_clear (high[0]);
21191 if (count > half_width)
21192 emit_insn (gen_lshr3 (low[0], low[0],
21193 GEN_INT (count - half_width)));
21197 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21199 if (!rtx_equal_p (operands[0], operands[1]))
21200 emit_move_insn (operands[0], operands[1]);
21202 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
21203 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
21208 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21210 if (!rtx_equal_p (operands[0], operands[1]))
21211 emit_move_insn (operands[0], operands[1]);
21213 split_double_mode (mode, operands, 1, low, high);
21215 emit_insn (gen_shrd (low[0], high[0], operands[2]));
21216 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
21218 if (TARGET_CMOVE && scratch)
21220 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21221 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21223 ix86_expand_clear (scratch);
21224 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21229 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
21230 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
21232 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
21237 /* Predict just emitted jump instruction to be taken with probability PROB. */
21239 predict_jump (int prob)
21241 rtx insn = get_last_insn ();
21242 gcc_assert (JUMP_P (insn));
21243 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
21246 /* Helper function for the string operations below. Dest VARIABLE whether
21247 it is aligned to VALUE bytes. If true, jump to the label. */
21249 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
21251 rtx label = gen_label_rtx ();
21252 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
21253 if (GET_MODE (variable) == DImode)
21254 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
21256 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
21257 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
21260 predict_jump (REG_BR_PROB_BASE * 50 / 100);
21262 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21266 /* Adjust COUNTER by the VALUE. */
21268 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
21270 rtx (*gen_add)(rtx, rtx, rtx)
21271 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
21273 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
21276 /* Zero extend possibly SImode EXP to Pmode register. */
21278 ix86_zero_extend_to_Pmode (rtx exp)
21281 if (GET_MODE (exp) == VOIDmode)
21282 return force_reg (Pmode, exp);
21283 if (GET_MODE (exp) == Pmode)
21284 return copy_to_mode_reg (Pmode, exp);
21285 r = gen_reg_rtx (Pmode);
21286 emit_insn (gen_zero_extendsidi2 (r, exp));
21290 /* Divide COUNTREG by SCALE. */
21292 scale_counter (rtx countreg, int scale)
21298 if (CONST_INT_P (countreg))
21299 return GEN_INT (INTVAL (countreg) / scale);
21300 gcc_assert (REG_P (countreg));
21302 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
21303 GEN_INT (exact_log2 (scale)),
21304 NULL, 1, OPTAB_DIRECT);
21308 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21309 DImode for constant loop counts. */
21311 static enum machine_mode
21312 counter_mode (rtx count_exp)
21314 if (GET_MODE (count_exp) != VOIDmode)
21315 return GET_MODE (count_exp);
21316 if (!CONST_INT_P (count_exp))
21318 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
21323 /* When SRCPTR is non-NULL, output simple loop to move memory
21324 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21325 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21326 equivalent loop to set memory by VALUE (supposed to be in MODE).
21328 The size is rounded down to whole number of chunk size moved at once.
21329 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21333 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
21334 rtx destptr, rtx srcptr, rtx value,
21335 rtx count, enum machine_mode mode, int unroll,
21338 rtx out_label, top_label, iter, tmp;
21339 enum machine_mode iter_mode = counter_mode (count);
21340 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
21341 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
21347 top_label = gen_label_rtx ();
21348 out_label = gen_label_rtx ();
21349 iter = gen_reg_rtx (iter_mode);
21351 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
21352 NULL, 1, OPTAB_DIRECT);
21353 /* Those two should combine. */
21354 if (piece_size == const1_rtx)
21356 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
21358 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21360 emit_move_insn (iter, const0_rtx);
21362 emit_label (top_label);
21364 tmp = convert_modes (Pmode, iter_mode, iter, true);
21365 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
21366 destmem = change_address (destmem, mode, x_addr);
21370 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
21371 srcmem = change_address (srcmem, mode, y_addr);
21373 /* When unrolling for chips that reorder memory reads and writes,
21374 we can save registers by using single temporary.
21375 Also using 4 temporaries is overkill in 32bit mode. */
21376 if (!TARGET_64BIT && 0)
21378 for (i = 0; i < unroll; i++)
21383 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21385 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21387 emit_move_insn (destmem, srcmem);
21393 gcc_assert (unroll <= 4);
21394 for (i = 0; i < unroll; i++)
21396 tmpreg[i] = gen_reg_rtx (mode);
21400 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21402 emit_move_insn (tmpreg[i], srcmem);
21404 for (i = 0; i < unroll; i++)
21409 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21411 emit_move_insn (destmem, tmpreg[i]);
21416 for (i = 0; i < unroll; i++)
21420 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21421 emit_move_insn (destmem, value);
21424 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
21425 true, OPTAB_LIB_WIDEN);
21427 emit_move_insn (iter, tmp);
21429 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
21431 if (expected_size != -1)
21433 expected_size /= GET_MODE_SIZE (mode) * unroll;
21434 if (expected_size == 0)
21436 else if (expected_size > REG_BR_PROB_BASE)
21437 predict_jump (REG_BR_PROB_BASE - 1);
21439 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
21442 predict_jump (REG_BR_PROB_BASE * 80 / 100);
21443 iter = ix86_zero_extend_to_Pmode (iter);
21444 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
21445 true, OPTAB_LIB_WIDEN);
21446 if (tmp != destptr)
21447 emit_move_insn (destptr, tmp);
21450 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
21451 true, OPTAB_LIB_WIDEN);
21453 emit_move_insn (srcptr, tmp);
21455 emit_label (out_label);
21458 /* Output "rep; mov" instruction.
21459 Arguments have same meaning as for previous function */
21461 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
21462 rtx destptr, rtx srcptr,
21464 enum machine_mode mode)
21469 HOST_WIDE_INT rounded_count;
21471 /* If the size is known, it is shorter to use rep movs. */
21472 if (mode == QImode && CONST_INT_P (count)
21473 && !(INTVAL (count) & 3))
21476 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21477 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21478 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
21479 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
21480 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21481 if (mode != QImode)
21483 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21484 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21485 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21486 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
21487 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21488 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
21492 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21493 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
21495 if (CONST_INT_P (count))
21497 rounded_count = (INTVAL (count)
21498 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21499 destmem = shallow_copy_rtx (destmem);
21500 srcmem = shallow_copy_rtx (srcmem);
21501 set_mem_size (destmem, rounded_count);
21502 set_mem_size (srcmem, rounded_count);
21506 if (MEM_SIZE_KNOWN_P (destmem))
21507 clear_mem_size (destmem);
21508 if (MEM_SIZE_KNOWN_P (srcmem))
21509 clear_mem_size (srcmem);
21511 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
21515 /* Output "rep; stos" instruction.
21516 Arguments have same meaning as for previous function */
21518 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
21519 rtx count, enum machine_mode mode,
21524 HOST_WIDE_INT rounded_count;
21526 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21527 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21528 value = force_reg (mode, gen_lowpart (mode, value));
21529 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21530 if (mode != QImode)
21532 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21533 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21534 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21537 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21538 if (orig_value == const0_rtx && CONST_INT_P (count))
21540 rounded_count = (INTVAL (count)
21541 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21542 destmem = shallow_copy_rtx (destmem);
21543 set_mem_size (destmem, rounded_count);
21545 else if (MEM_SIZE_KNOWN_P (destmem))
21546 clear_mem_size (destmem);
21547 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
21551 emit_strmov (rtx destmem, rtx srcmem,
21552 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
21554 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
21555 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
21556 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21559 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21561 expand_movmem_epilogue (rtx destmem, rtx srcmem,
21562 rtx destptr, rtx srcptr, rtx count, int max_size)
21565 if (CONST_INT_P (count))
21567 HOST_WIDE_INT countval = INTVAL (count);
21570 if ((countval & 0x10) && max_size > 16)
21574 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
21575 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
21578 gcc_unreachable ();
21581 if ((countval & 0x08) && max_size > 8)
21584 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
21587 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
21588 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
21592 if ((countval & 0x04) && max_size > 4)
21594 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
21597 if ((countval & 0x02) && max_size > 2)
21599 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
21602 if ((countval & 0x01) && max_size > 1)
21604 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
21611 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
21612 count, 1, OPTAB_DIRECT);
21613 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
21614 count, QImode, 1, 4);
21618 /* When there are stringops, we can cheaply increase dest and src pointers.
21619 Otherwise we save code size by maintaining offset (zero is readily
21620 available from preceding rep operation) and using x86 addressing modes.
21622 if (TARGET_SINGLE_STRINGOP)
21626 rtx label = ix86_expand_aligntest (count, 4, true);
21627 src = change_address (srcmem, SImode, srcptr);
21628 dest = change_address (destmem, SImode, destptr);
21629 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21630 emit_label (label);
21631 LABEL_NUSES (label) = 1;
21635 rtx label = ix86_expand_aligntest (count, 2, true);
21636 src = change_address (srcmem, HImode, srcptr);
21637 dest = change_address (destmem, HImode, destptr);
21638 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21639 emit_label (label);
21640 LABEL_NUSES (label) = 1;
21644 rtx label = ix86_expand_aligntest (count, 1, true);
21645 src = change_address (srcmem, QImode, srcptr);
21646 dest = change_address (destmem, QImode, destptr);
21647 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21648 emit_label (label);
21649 LABEL_NUSES (label) = 1;
21654 rtx offset = force_reg (Pmode, const0_rtx);
21659 rtx label = ix86_expand_aligntest (count, 4, true);
21660 src = change_address (srcmem, SImode, srcptr);
21661 dest = change_address (destmem, SImode, destptr);
21662 emit_move_insn (dest, src);
21663 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
21664 true, OPTAB_LIB_WIDEN);
21666 emit_move_insn (offset, tmp);
21667 emit_label (label);
21668 LABEL_NUSES (label) = 1;
21672 rtx label = ix86_expand_aligntest (count, 2, true);
21673 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
21674 src = change_address (srcmem, HImode, tmp);
21675 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
21676 dest = change_address (destmem, HImode, tmp);
21677 emit_move_insn (dest, src);
21678 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
21679 true, OPTAB_LIB_WIDEN);
21681 emit_move_insn (offset, tmp);
21682 emit_label (label);
21683 LABEL_NUSES (label) = 1;
21687 rtx label = ix86_expand_aligntest (count, 1, true);
21688 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
21689 src = change_address (srcmem, QImode, tmp);
21690 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
21691 dest = change_address (destmem, QImode, tmp);
21692 emit_move_insn (dest, src);
21693 emit_label (label);
21694 LABEL_NUSES (label) = 1;
21699 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21701 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
21702 rtx count, int max_size)
21705 expand_simple_binop (counter_mode (count), AND, count,
21706 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
21707 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
21708 gen_lowpart (QImode, value), count, QImode,
21712 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21714 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
21718 if (CONST_INT_P (count))
21720 HOST_WIDE_INT countval = INTVAL (count);
21723 if ((countval & 0x10) && max_size > 16)
21727 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
21728 emit_insn (gen_strset (destptr, dest, value));
21729 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
21730 emit_insn (gen_strset (destptr, dest, value));
21733 gcc_unreachable ();
21736 if ((countval & 0x08) && max_size > 8)
21740 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
21741 emit_insn (gen_strset (destptr, dest, value));
21745 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
21746 emit_insn (gen_strset (destptr, dest, value));
21747 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
21748 emit_insn (gen_strset (destptr, dest, value));
21752 if ((countval & 0x04) && max_size > 4)
21754 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
21755 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
21758 if ((countval & 0x02) && max_size > 2)
21760 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
21761 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
21764 if ((countval & 0x01) && max_size > 1)
21766 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
21767 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
21774 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
21779 rtx label = ix86_expand_aligntest (count, 16, true);
21782 dest = change_address (destmem, DImode, destptr);
21783 emit_insn (gen_strset (destptr, dest, value));
21784 emit_insn (gen_strset (destptr, dest, value));
21788 dest = change_address (destmem, SImode, destptr);
21789 emit_insn (gen_strset (destptr, dest, value));
21790 emit_insn (gen_strset (destptr, dest, value));
21791 emit_insn (gen_strset (destptr, dest, value));
21792 emit_insn (gen_strset (destptr, dest, value));
21794 emit_label (label);
21795 LABEL_NUSES (label) = 1;
21799 rtx label = ix86_expand_aligntest (count, 8, true);
21802 dest = change_address (destmem, DImode, destptr);
21803 emit_insn (gen_strset (destptr, dest, value));
21807 dest = change_address (destmem, SImode, destptr);
21808 emit_insn (gen_strset (destptr, dest, value));
21809 emit_insn (gen_strset (destptr, dest, value));
21811 emit_label (label);
21812 LABEL_NUSES (label) = 1;
21816 rtx label = ix86_expand_aligntest (count, 4, true);
21817 dest = change_address (destmem, SImode, destptr);
21818 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
21819 emit_label (label);
21820 LABEL_NUSES (label) = 1;
21824 rtx label = ix86_expand_aligntest (count, 2, true);
21825 dest = change_address (destmem, HImode, destptr);
21826 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
21827 emit_label (label);
21828 LABEL_NUSES (label) = 1;
21832 rtx label = ix86_expand_aligntest (count, 1, true);
21833 dest = change_address (destmem, QImode, destptr);
21834 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
21835 emit_label (label);
21836 LABEL_NUSES (label) = 1;
21840 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21841 DESIRED_ALIGNMENT. */
21843 expand_movmem_prologue (rtx destmem, rtx srcmem,
21844 rtx destptr, rtx srcptr, rtx count,
21845 int align, int desired_alignment)
21847 if (align <= 1 && desired_alignment > 1)
21849 rtx label = ix86_expand_aligntest (destptr, 1, false);
21850 srcmem = change_address (srcmem, QImode, srcptr);
21851 destmem = change_address (destmem, QImode, destptr);
21852 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21853 ix86_adjust_counter (count, 1);
21854 emit_label (label);
21855 LABEL_NUSES (label) = 1;
21857 if (align <= 2 && desired_alignment > 2)
21859 rtx label = ix86_expand_aligntest (destptr, 2, false);
21860 srcmem = change_address (srcmem, HImode, srcptr);
21861 destmem = change_address (destmem, HImode, destptr);
21862 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21863 ix86_adjust_counter (count, 2);
21864 emit_label (label);
21865 LABEL_NUSES (label) = 1;
21867 if (align <= 4 && desired_alignment > 4)
21869 rtx label = ix86_expand_aligntest (destptr, 4, false);
21870 srcmem = change_address (srcmem, SImode, srcptr);
21871 destmem = change_address (destmem, SImode, destptr);
21872 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21873 ix86_adjust_counter (count, 4);
21874 emit_label (label);
21875 LABEL_NUSES (label) = 1;
21877 gcc_assert (desired_alignment <= 8);
21880 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21881 ALIGN_BYTES is how many bytes need to be copied. */
21883 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
21884 int desired_align, int align_bytes)
21887 rtx orig_dst = dst;
21888 rtx orig_src = src;
21890 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
21891 if (src_align_bytes >= 0)
21892 src_align_bytes = desired_align - src_align_bytes;
21893 if (align_bytes & 1)
21895 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21896 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
21898 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21900 if (align_bytes & 2)
21902 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21903 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
21904 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21905 set_mem_align (dst, 2 * BITS_PER_UNIT);
21906 if (src_align_bytes >= 0
21907 && (src_align_bytes & 1) == (align_bytes & 1)
21908 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
21909 set_mem_align (src, 2 * BITS_PER_UNIT);
21911 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21913 if (align_bytes & 4)
21915 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21916 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
21917 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21918 set_mem_align (dst, 4 * BITS_PER_UNIT);
21919 if (src_align_bytes >= 0)
21921 unsigned int src_align = 0;
21922 if ((src_align_bytes & 3) == (align_bytes & 3))
21924 else if ((src_align_bytes & 1) == (align_bytes & 1))
21926 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21927 set_mem_align (src, src_align * BITS_PER_UNIT);
21930 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21932 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21933 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
21934 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21935 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21936 if (src_align_bytes >= 0)
21938 unsigned int src_align = 0;
21939 if ((src_align_bytes & 7) == (align_bytes & 7))
21941 else if ((src_align_bytes & 3) == (align_bytes & 3))
21943 else if ((src_align_bytes & 1) == (align_bytes & 1))
21945 if (src_align > (unsigned int) desired_align)
21946 src_align = desired_align;
21947 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21948 set_mem_align (src, src_align * BITS_PER_UNIT);
21950 if (MEM_SIZE_KNOWN_P (orig_dst))
21951 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21952 if (MEM_SIZE_KNOWN_P (orig_src))
21953 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
21958 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21959 DESIRED_ALIGNMENT. */
21961 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
21962 int align, int desired_alignment)
21964 if (align <= 1 && desired_alignment > 1)
21966 rtx label = ix86_expand_aligntest (destptr, 1, false);
21967 destmem = change_address (destmem, QImode, destptr);
21968 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
21969 ix86_adjust_counter (count, 1);
21970 emit_label (label);
21971 LABEL_NUSES (label) = 1;
21973 if (align <= 2 && desired_alignment > 2)
21975 rtx label = ix86_expand_aligntest (destptr, 2, false);
21976 destmem = change_address (destmem, HImode, destptr);
21977 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
21978 ix86_adjust_counter (count, 2);
21979 emit_label (label);
21980 LABEL_NUSES (label) = 1;
21982 if (align <= 4 && desired_alignment > 4)
21984 rtx label = ix86_expand_aligntest (destptr, 4, false);
21985 destmem = change_address (destmem, SImode, destptr);
21986 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
21987 ix86_adjust_counter (count, 4);
21988 emit_label (label);
21989 LABEL_NUSES (label) = 1;
21991 gcc_assert (desired_alignment <= 8);
21994 /* Set enough from DST to align DST known to by aligned by ALIGN to
21995 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21997 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
21998 int desired_align, int align_bytes)
22001 rtx orig_dst = dst;
22002 if (align_bytes & 1)
22004 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
22006 emit_insn (gen_strset (destreg, dst,
22007 gen_lowpart (QImode, value)));
22009 if (align_bytes & 2)
22011 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
22012 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
22013 set_mem_align (dst, 2 * BITS_PER_UNIT);
22015 emit_insn (gen_strset (destreg, dst,
22016 gen_lowpart (HImode, value)));
22018 if (align_bytes & 4)
22020 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
22021 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
22022 set_mem_align (dst, 4 * BITS_PER_UNIT);
22024 emit_insn (gen_strset (destreg, dst,
22025 gen_lowpart (SImode, value)));
22027 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
22028 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
22029 set_mem_align (dst, desired_align * BITS_PER_UNIT);
22030 if (MEM_SIZE_KNOWN_P (orig_dst))
22031 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
22035 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22036 static enum stringop_alg
22037 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
22038 int *dynamic_check)
22040 const struct stringop_algs * algs;
22041 bool optimize_for_speed;
22042 /* Algorithms using the rep prefix want at least edi and ecx;
22043 additionally, memset wants eax and memcpy wants esi. Don't
22044 consider such algorithms if the user has appropriated those
22045 registers for their own purposes. */
22046 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
22048 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
22050 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22051 || (alg != rep_prefix_1_byte \
22052 && alg != rep_prefix_4_byte \
22053 && alg != rep_prefix_8_byte))
22054 const struct processor_costs *cost;
22056 /* Even if the string operation call is cold, we still might spend a lot
22057 of time processing large blocks. */
22058 if (optimize_function_for_size_p (cfun)
22059 || (optimize_insn_for_size_p ()
22060 && expected_size != -1 && expected_size < 256))
22061 optimize_for_speed = false;
22063 optimize_for_speed = true;
22065 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
22067 *dynamic_check = -1;
22069 algs = &cost->memset[TARGET_64BIT != 0];
22071 algs = &cost->memcpy[TARGET_64BIT != 0];
22072 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
22073 return ix86_stringop_alg;
22074 /* rep; movq or rep; movl is the smallest variant. */
22075 else if (!optimize_for_speed)
22077 if (!count || (count & 3))
22078 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
22080 return rep_prefix_usable ? rep_prefix_4_byte : loop;
22082 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22084 else if (expected_size != -1 && expected_size < 4)
22085 return loop_1_byte;
22086 else if (expected_size != -1)
22089 enum stringop_alg alg = libcall;
22090 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
22092 /* We get here if the algorithms that were not libcall-based
22093 were rep-prefix based and we are unable to use rep prefixes
22094 based on global register usage. Break out of the loop and
22095 use the heuristic below. */
22096 if (algs->size[i].max == 0)
22098 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
22100 enum stringop_alg candidate = algs->size[i].alg;
22102 if (candidate != libcall && ALG_USABLE_P (candidate))
22104 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22105 last non-libcall inline algorithm. */
22106 if (TARGET_INLINE_ALL_STRINGOPS)
22108 /* When the current size is best to be copied by a libcall,
22109 but we are still forced to inline, run the heuristic below
22110 that will pick code for medium sized blocks. */
22111 if (alg != libcall)
22115 else if (ALG_USABLE_P (candidate))
22119 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
22121 /* When asked to inline the call anyway, try to pick meaningful choice.
22122 We look for maximal size of block that is faster to copy by hand and
22123 take blocks of at most of that size guessing that average size will
22124 be roughly half of the block.
22126 If this turns out to be bad, we might simply specify the preferred
22127 choice in ix86_costs. */
22128 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22129 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
22132 enum stringop_alg alg;
22134 bool any_alg_usable_p = true;
22136 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
22138 enum stringop_alg candidate = algs->size[i].alg;
22139 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
22141 if (candidate != libcall && candidate
22142 && ALG_USABLE_P (candidate))
22143 max = algs->size[i].max;
22145 /* If there aren't any usable algorithms, then recursing on
22146 smaller sizes isn't going to find anything. Just return the
22147 simple byte-at-a-time copy loop. */
22148 if (!any_alg_usable_p)
22150 /* Pick something reasonable. */
22151 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22152 *dynamic_check = 128;
22153 return loop_1_byte;
22157 alg = decide_alg (count, max / 2, memset, dynamic_check);
22158 gcc_assert (*dynamic_check == -1);
22159 gcc_assert (alg != libcall);
22160 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22161 *dynamic_check = max;
22164 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
22165 #undef ALG_USABLE_P
22168 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22169 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22171 decide_alignment (int align,
22172 enum stringop_alg alg,
22175 int desired_align = 0;
22179 gcc_unreachable ();
22181 case unrolled_loop:
22182 desired_align = GET_MODE_SIZE (Pmode);
22184 case rep_prefix_8_byte:
22187 case rep_prefix_4_byte:
22188 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22189 copying whole cacheline at once. */
22190 if (TARGET_PENTIUMPRO)
22195 case rep_prefix_1_byte:
22196 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22197 copying whole cacheline at once. */
22198 if (TARGET_PENTIUMPRO)
22212 if (desired_align < align)
22213 desired_align = align;
22214 if (expected_size != -1 && expected_size < 4)
22215 desired_align = align;
22216 return desired_align;
22219 /* Return the smallest power of 2 greater than VAL. */
22221 smallest_pow2_greater_than (int val)
22229 /* Expand string move (memcpy) operation. Use i386 string operations
22230 when profitable. expand_setmem contains similar code. The code
22231 depends upon architecture, block size and alignment, but always has
22232 the same overall structure:
22234 1) Prologue guard: Conditional that jumps up to epilogues for small
22235 blocks that can be handled by epilogue alone. This is faster
22236 but also needed for correctness, since prologue assume the block
22237 is larger than the desired alignment.
22239 Optional dynamic check for size and libcall for large
22240 blocks is emitted here too, with -minline-stringops-dynamically.
22242 2) Prologue: copy first few bytes in order to get destination
22243 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22244 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22245 copied. We emit either a jump tree on power of two sized
22246 blocks, or a byte loop.
22248 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22249 with specified algorithm.
22251 4) Epilogue: code copying tail of the block that is too small to be
22252 handled by main body (or up to size guarded by prologue guard). */
22255 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
22256 rtx expected_align_exp, rtx expected_size_exp)
22262 rtx jump_around_label = NULL;
22263 HOST_WIDE_INT align = 1;
22264 unsigned HOST_WIDE_INT count = 0;
22265 HOST_WIDE_INT expected_size = -1;
22266 int size_needed = 0, epilogue_size_needed;
22267 int desired_align = 0, align_bytes = 0;
22268 enum stringop_alg alg;
22270 bool need_zero_guard = false;
22272 if (CONST_INT_P (align_exp))
22273 align = INTVAL (align_exp);
22274 /* i386 can do misaligned access on reasonably increased cost. */
22275 if (CONST_INT_P (expected_align_exp)
22276 && INTVAL (expected_align_exp) > align)
22277 align = INTVAL (expected_align_exp);
22278 /* ALIGN is the minimum of destination and source alignment, but we care here
22279 just about destination alignment. */
22280 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
22281 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
22283 if (CONST_INT_P (count_exp))
22284 count = expected_size = INTVAL (count_exp);
22285 if (CONST_INT_P (expected_size_exp) && count == 0)
22286 expected_size = INTVAL (expected_size_exp);
22288 /* Make sure we don't need to care about overflow later on. */
22289 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22292 /* Step 0: Decide on preferred algorithm, desired alignment and
22293 size of chunks to be copied by main loop. */
22295 alg = decide_alg (count, expected_size, false, &dynamic_check);
22296 desired_align = decide_alignment (align, alg, expected_size);
22298 if (!TARGET_ALIGN_STRINGOPS)
22299 align = desired_align;
22301 if (alg == libcall)
22303 gcc_assert (alg != no_stringop);
22305 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
22306 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
22307 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
22312 gcc_unreachable ();
22314 need_zero_guard = true;
22315 size_needed = GET_MODE_SIZE (Pmode);
22317 case unrolled_loop:
22318 need_zero_guard = true;
22319 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
22321 case rep_prefix_8_byte:
22324 case rep_prefix_4_byte:
22327 case rep_prefix_1_byte:
22331 need_zero_guard = true;
22336 epilogue_size_needed = size_needed;
22338 /* Step 1: Prologue guard. */
22340 /* Alignment code needs count to be in register. */
22341 if (CONST_INT_P (count_exp) && desired_align > align)
22343 if (INTVAL (count_exp) > desired_align
22344 && INTVAL (count_exp) > size_needed)
22347 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22348 if (align_bytes <= 0)
22351 align_bytes = desired_align - align_bytes;
22353 if (align_bytes == 0)
22354 count_exp = force_reg (counter_mode (count_exp), count_exp);
22356 gcc_assert (desired_align >= 1 && align >= 1);
22358 /* Ensure that alignment prologue won't copy past end of block. */
22359 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22361 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22362 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22363 Make sure it is power of 2. */
22364 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22368 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22370 /* If main algorithm works on QImode, no epilogue is needed.
22371 For small sizes just don't align anything. */
22372 if (size_needed == 1)
22373 desired_align = align;
22380 label = gen_label_rtx ();
22381 emit_cmp_and_jump_insns (count_exp,
22382 GEN_INT (epilogue_size_needed),
22383 LTU, 0, counter_mode (count_exp), 1, label);
22384 if (expected_size == -1 || expected_size < epilogue_size_needed)
22385 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22387 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22391 /* Emit code to decide on runtime whether library call or inline should be
22393 if (dynamic_check != -1)
22395 if (CONST_INT_P (count_exp))
22397 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
22399 emit_block_move_via_libcall (dst, src, count_exp, false);
22400 count_exp = const0_rtx;
22406 rtx hot_label = gen_label_rtx ();
22407 jump_around_label = gen_label_rtx ();
22408 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22409 LEU, 0, GET_MODE (count_exp), 1, hot_label);
22410 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22411 emit_block_move_via_libcall (dst, src, count_exp, false);
22412 emit_jump (jump_around_label);
22413 emit_label (hot_label);
22417 /* Step 2: Alignment prologue. */
22419 if (desired_align > align)
22421 if (align_bytes == 0)
22423 /* Except for the first move in epilogue, we no longer know
22424 constant offset in aliasing info. It don't seems to worth
22425 the pain to maintain it for the first move, so throw away
22427 src = change_address (src, BLKmode, srcreg);
22428 dst = change_address (dst, BLKmode, destreg);
22429 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
22434 /* If we know how many bytes need to be stored before dst is
22435 sufficiently aligned, maintain aliasing info accurately. */
22436 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
22437 desired_align, align_bytes);
22438 count_exp = plus_constant (count_exp, -align_bytes);
22439 count -= align_bytes;
22441 if (need_zero_guard
22442 && (count < (unsigned HOST_WIDE_INT) size_needed
22443 || (align_bytes == 0
22444 && count < ((unsigned HOST_WIDE_INT) size_needed
22445 + desired_align - align))))
22447 /* It is possible that we copied enough so the main loop will not
22449 gcc_assert (size_needed > 1);
22450 if (label == NULL_RTX)
22451 label = gen_label_rtx ();
22452 emit_cmp_and_jump_insns (count_exp,
22453 GEN_INT (size_needed),
22454 LTU, 0, counter_mode (count_exp), 1, label);
22455 if (expected_size == -1
22456 || expected_size < (desired_align - align) / 2 + size_needed)
22457 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22459 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22462 if (label && size_needed == 1)
22464 emit_label (label);
22465 LABEL_NUSES (label) = 1;
22467 epilogue_size_needed = 1;
22469 else if (label == NULL_RTX)
22470 epilogue_size_needed = size_needed;
22472 /* Step 3: Main loop. */
22478 gcc_unreachable ();
22480 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22481 count_exp, QImode, 1, expected_size);
22484 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22485 count_exp, Pmode, 1, expected_size);
22487 case unrolled_loop:
22488 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22489 registers for 4 temporaries anyway. */
22490 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22491 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
22494 case rep_prefix_8_byte:
22495 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22498 case rep_prefix_4_byte:
22499 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22502 case rep_prefix_1_byte:
22503 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22507 /* Adjust properly the offset of src and dest memory for aliasing. */
22508 if (CONST_INT_P (count_exp))
22510 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
22511 (count / size_needed) * size_needed);
22512 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
22513 (count / size_needed) * size_needed);
22517 src = change_address (src, BLKmode, srcreg);
22518 dst = change_address (dst, BLKmode, destreg);
22521 /* Step 4: Epilogue to copy the remaining bytes. */
22525 /* When the main loop is done, COUNT_EXP might hold original count,
22526 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22527 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22528 bytes. Compensate if needed. */
22530 if (size_needed < epilogue_size_needed)
22533 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22534 GEN_INT (size_needed - 1), count_exp, 1,
22536 if (tmp != count_exp)
22537 emit_move_insn (count_exp, tmp);
22539 emit_label (label);
22540 LABEL_NUSES (label) = 1;
22543 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22544 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
22545 epilogue_size_needed);
22546 if (jump_around_label)
22547 emit_label (jump_around_label);
22551 /* Helper function for memcpy. For QImode value 0xXY produce
22552 0xXYXYXYXY of wide specified by MODE. This is essentially
22553 a * 0x10101010, but we can do slightly better than
22554 synth_mult by unwinding the sequence by hand on CPUs with
22557 promote_duplicated_reg (enum machine_mode mode, rtx val)
22559 enum machine_mode valmode = GET_MODE (val);
22561 int nops = mode == DImode ? 3 : 2;
22563 gcc_assert (mode == SImode || mode == DImode);
22564 if (val == const0_rtx)
22565 return copy_to_mode_reg (mode, const0_rtx);
22566 if (CONST_INT_P (val))
22568 HOST_WIDE_INT v = INTVAL (val) & 255;
22572 if (mode == DImode)
22573 v |= (v << 16) << 16;
22574 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
22577 if (valmode == VOIDmode)
22579 if (valmode != QImode)
22580 val = gen_lowpart (QImode, val);
22581 if (mode == QImode)
22583 if (!TARGET_PARTIAL_REG_STALL)
22585 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
22586 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
22587 <= (ix86_cost->shift_const + ix86_cost->add) * nops
22588 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
22590 rtx reg = convert_modes (mode, QImode, val, true);
22591 tmp = promote_duplicated_reg (mode, const1_rtx);
22592 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
22597 rtx reg = convert_modes (mode, QImode, val, true);
22599 if (!TARGET_PARTIAL_REG_STALL)
22600 if (mode == SImode)
22601 emit_insn (gen_movsi_insv_1 (reg, reg));
22603 emit_insn (gen_movdi_insv_1 (reg, reg));
22606 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
22607 NULL, 1, OPTAB_DIRECT);
22609 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22611 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
22612 NULL, 1, OPTAB_DIRECT);
22613 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22614 if (mode == SImode)
22616 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
22617 NULL, 1, OPTAB_DIRECT);
22618 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22623 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22624 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22625 alignment from ALIGN to DESIRED_ALIGN. */
22627 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
22632 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
22633 promoted_val = promote_duplicated_reg (DImode, val);
22634 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
22635 promoted_val = promote_duplicated_reg (SImode, val);
22636 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
22637 promoted_val = promote_duplicated_reg (HImode, val);
22639 promoted_val = val;
22641 return promoted_val;
22644 /* Expand string clear operation (bzero). Use i386 string operations when
22645 profitable. See expand_movmem comment for explanation of individual
22646 steps performed. */
22648 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
22649 rtx expected_align_exp, rtx expected_size_exp)
22654 rtx jump_around_label = NULL;
22655 HOST_WIDE_INT align = 1;
22656 unsigned HOST_WIDE_INT count = 0;
22657 HOST_WIDE_INT expected_size = -1;
22658 int size_needed = 0, epilogue_size_needed;
22659 int desired_align = 0, align_bytes = 0;
22660 enum stringop_alg alg;
22661 rtx promoted_val = NULL;
22662 bool force_loopy_epilogue = false;
22664 bool need_zero_guard = false;
22666 if (CONST_INT_P (align_exp))
22667 align = INTVAL (align_exp);
22668 /* i386 can do misaligned access on reasonably increased cost. */
22669 if (CONST_INT_P (expected_align_exp)
22670 && INTVAL (expected_align_exp) > align)
22671 align = INTVAL (expected_align_exp);
22672 if (CONST_INT_P (count_exp))
22673 count = expected_size = INTVAL (count_exp);
22674 if (CONST_INT_P (expected_size_exp) && count == 0)
22675 expected_size = INTVAL (expected_size_exp);
22677 /* Make sure we don't need to care about overflow later on. */
22678 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22681 /* Step 0: Decide on preferred algorithm, desired alignment and
22682 size of chunks to be copied by main loop. */
22684 alg = decide_alg (count, expected_size, true, &dynamic_check);
22685 desired_align = decide_alignment (align, alg, expected_size);
22687 if (!TARGET_ALIGN_STRINGOPS)
22688 align = desired_align;
22690 if (alg == libcall)
22692 gcc_assert (alg != no_stringop);
22694 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
22695 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
22700 gcc_unreachable ();
22702 need_zero_guard = true;
22703 size_needed = GET_MODE_SIZE (Pmode);
22705 case unrolled_loop:
22706 need_zero_guard = true;
22707 size_needed = GET_MODE_SIZE (Pmode) * 4;
22709 case rep_prefix_8_byte:
22712 case rep_prefix_4_byte:
22715 case rep_prefix_1_byte:
22719 need_zero_guard = true;
22723 epilogue_size_needed = size_needed;
22725 /* Step 1: Prologue guard. */
22727 /* Alignment code needs count to be in register. */
22728 if (CONST_INT_P (count_exp) && desired_align > align)
22730 if (INTVAL (count_exp) > desired_align
22731 && INTVAL (count_exp) > size_needed)
22734 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22735 if (align_bytes <= 0)
22738 align_bytes = desired_align - align_bytes;
22740 if (align_bytes == 0)
22742 enum machine_mode mode = SImode;
22743 if (TARGET_64BIT && (count & ~0xffffffff))
22745 count_exp = force_reg (mode, count_exp);
22748 /* Do the cheap promotion to allow better CSE across the
22749 main loop and epilogue (ie one load of the big constant in the
22750 front of all code. */
22751 if (CONST_INT_P (val_exp))
22752 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
22753 desired_align, align);
22754 /* Ensure that alignment prologue won't copy past end of block. */
22755 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22757 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22758 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22759 Make sure it is power of 2. */
22760 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22762 /* To improve performance of small blocks, we jump around the VAL
22763 promoting mode. This mean that if the promoted VAL is not constant,
22764 we might not use it in the epilogue and have to use byte
22766 if (epilogue_size_needed > 2 && !promoted_val)
22767 force_loopy_epilogue = true;
22770 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22772 /* If main algorithm works on QImode, no epilogue is needed.
22773 For small sizes just don't align anything. */
22774 if (size_needed == 1)
22775 desired_align = align;
22782 label = gen_label_rtx ();
22783 emit_cmp_and_jump_insns (count_exp,
22784 GEN_INT (epilogue_size_needed),
22785 LTU, 0, counter_mode (count_exp), 1, label);
22786 if (expected_size == -1 || expected_size <= epilogue_size_needed)
22787 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22789 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22792 if (dynamic_check != -1)
22794 rtx hot_label = gen_label_rtx ();
22795 jump_around_label = gen_label_rtx ();
22796 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22797 LEU, 0, counter_mode (count_exp), 1, hot_label);
22798 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22799 set_storage_via_libcall (dst, count_exp, val_exp, false);
22800 emit_jump (jump_around_label);
22801 emit_label (hot_label);
22804 /* Step 2: Alignment prologue. */
22806 /* Do the expensive promotion once we branched off the small blocks. */
22808 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
22809 desired_align, align);
22810 gcc_assert (desired_align >= 1 && align >= 1);
22812 if (desired_align > align)
22814 if (align_bytes == 0)
22816 /* Except for the first move in epilogue, we no longer know
22817 constant offset in aliasing info. It don't seems to worth
22818 the pain to maintain it for the first move, so throw away
22820 dst = change_address (dst, BLKmode, destreg);
22821 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
22826 /* If we know how many bytes need to be stored before dst is
22827 sufficiently aligned, maintain aliasing info accurately. */
22828 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
22829 desired_align, align_bytes);
22830 count_exp = plus_constant (count_exp, -align_bytes);
22831 count -= align_bytes;
22833 if (need_zero_guard
22834 && (count < (unsigned HOST_WIDE_INT) size_needed
22835 || (align_bytes == 0
22836 && count < ((unsigned HOST_WIDE_INT) size_needed
22837 + desired_align - align))))
22839 /* It is possible that we copied enough so the main loop will not
22841 gcc_assert (size_needed > 1);
22842 if (label == NULL_RTX)
22843 label = gen_label_rtx ();
22844 emit_cmp_and_jump_insns (count_exp,
22845 GEN_INT (size_needed),
22846 LTU, 0, counter_mode (count_exp), 1, label);
22847 if (expected_size == -1
22848 || expected_size < (desired_align - align) / 2 + size_needed)
22849 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22851 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22854 if (label && size_needed == 1)
22856 emit_label (label);
22857 LABEL_NUSES (label) = 1;
22859 promoted_val = val_exp;
22860 epilogue_size_needed = 1;
22862 else if (label == NULL_RTX)
22863 epilogue_size_needed = size_needed;
22865 /* Step 3: Main loop. */
22871 gcc_unreachable ();
22873 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22874 count_exp, QImode, 1, expected_size);
22877 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22878 count_exp, Pmode, 1, expected_size);
22880 case unrolled_loop:
22881 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22882 count_exp, Pmode, 4, expected_size);
22884 case rep_prefix_8_byte:
22885 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22888 case rep_prefix_4_byte:
22889 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22892 case rep_prefix_1_byte:
22893 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22897 /* Adjust properly the offset of src and dest memory for aliasing. */
22898 if (CONST_INT_P (count_exp))
22899 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
22900 (count / size_needed) * size_needed);
22902 dst = change_address (dst, BLKmode, destreg);
22904 /* Step 4: Epilogue to copy the remaining bytes. */
22908 /* When the main loop is done, COUNT_EXP might hold original count,
22909 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22910 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22911 bytes. Compensate if needed. */
22913 if (size_needed < epilogue_size_needed)
22916 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22917 GEN_INT (size_needed - 1), count_exp, 1,
22919 if (tmp != count_exp)
22920 emit_move_insn (count_exp, tmp);
22922 emit_label (label);
22923 LABEL_NUSES (label) = 1;
22926 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22928 if (force_loopy_epilogue)
22929 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
22930 epilogue_size_needed);
22932 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
22933 epilogue_size_needed);
22935 if (jump_around_label)
22936 emit_label (jump_around_label);
22940 /* Expand the appropriate insns for doing strlen if not just doing
22943 out = result, initialized with the start address
22944 align_rtx = alignment of the address.
22945 scratch = scratch register, initialized with the startaddress when
22946 not aligned, otherwise undefined
22948 This is just the body. It needs the initializations mentioned above and
22949 some address computing at the end. These things are done in i386.md. */
22952 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
22956 rtx align_2_label = NULL_RTX;
22957 rtx align_3_label = NULL_RTX;
22958 rtx align_4_label = gen_label_rtx ();
22959 rtx end_0_label = gen_label_rtx ();
22961 rtx tmpreg = gen_reg_rtx (SImode);
22962 rtx scratch = gen_reg_rtx (SImode);
22966 if (CONST_INT_P (align_rtx))
22967 align = INTVAL (align_rtx);
22969 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22971 /* Is there a known alignment and is it less than 4? */
22974 rtx scratch1 = gen_reg_rtx (Pmode);
22975 emit_move_insn (scratch1, out);
22976 /* Is there a known alignment and is it not 2? */
22979 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
22980 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
22982 /* Leave just the 3 lower bits. */
22983 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
22984 NULL_RTX, 0, OPTAB_WIDEN);
22986 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22987 Pmode, 1, align_4_label);
22988 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
22989 Pmode, 1, align_2_label);
22990 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
22991 Pmode, 1, align_3_label);
22995 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22996 check if is aligned to 4 - byte. */
22998 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
22999 NULL_RTX, 0, OPTAB_WIDEN);
23001 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
23002 Pmode, 1, align_4_label);
23005 mem = change_address (src, QImode, out);
23007 /* Now compare the bytes. */
23009 /* Compare the first n unaligned byte on a byte per byte basis. */
23010 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
23011 QImode, 1, end_0_label);
23013 /* Increment the address. */
23014 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
23016 /* Not needed with an alignment of 2 */
23019 emit_label (align_2_label);
23021 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
23024 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
23026 emit_label (align_3_label);
23029 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
23032 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
23035 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23036 align this loop. It gives only huge programs, but does not help to
23038 emit_label (align_4_label);
23040 mem = change_address (src, SImode, out);
23041 emit_move_insn (scratch, mem);
23042 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
23044 /* This formula yields a nonzero result iff one of the bytes is zero.
23045 This saves three branches inside loop and many cycles. */
23047 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
23048 emit_insn (gen_one_cmplsi2 (scratch, scratch));
23049 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
23050 emit_insn (gen_andsi3 (tmpreg, tmpreg,
23051 gen_int_mode (0x80808080, SImode)));
23052 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
23057 rtx reg = gen_reg_rtx (SImode);
23058 rtx reg2 = gen_reg_rtx (Pmode);
23059 emit_move_insn (reg, tmpreg);
23060 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
23062 /* If zero is not in the first two bytes, move two bytes forward. */
23063 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
23064 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
23065 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
23066 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
23067 gen_rtx_IF_THEN_ELSE (SImode, tmp,
23070 /* Emit lea manually to avoid clobbering of flags. */
23071 emit_insn (gen_rtx_SET (SImode, reg2,
23072 gen_rtx_PLUS (Pmode, out, const2_rtx)));
23074 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
23075 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
23076 emit_insn (gen_rtx_SET (VOIDmode, out,
23077 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
23083 rtx end_2_label = gen_label_rtx ();
23084 /* Is zero in the first two bytes? */
23086 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
23087 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
23088 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
23089 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
23090 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
23092 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
23093 JUMP_LABEL (tmp) = end_2_label;
23095 /* Not in the first two. Move two bytes forward. */
23096 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
23097 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
23099 emit_label (end_2_label);
23103 /* Avoid branch in fixing the byte. */
23104 tmpreg = gen_lowpart (QImode, tmpreg);
23105 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
23106 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
23107 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
23108 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
23110 emit_label (end_0_label);
23113 /* Expand strlen. */
23116 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
23118 rtx addr, scratch1, scratch2, scratch3, scratch4;
23120 /* The generic case of strlen expander is long. Avoid it's
23121 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23123 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
23124 && !TARGET_INLINE_ALL_STRINGOPS
23125 && !optimize_insn_for_size_p ()
23126 && (!CONST_INT_P (align) || INTVAL (align) < 4))
23129 addr = force_reg (Pmode, XEXP (src, 0));
23130 scratch1 = gen_reg_rtx (Pmode);
23132 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
23133 && !optimize_insn_for_size_p ())
23135 /* Well it seems that some optimizer does not combine a call like
23136 foo(strlen(bar), strlen(bar));
23137 when the move and the subtraction is done here. It does calculate
23138 the length just once when these instructions are done inside of
23139 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23140 often used and I use one fewer register for the lifetime of
23141 output_strlen_unroll() this is better. */
23143 emit_move_insn (out, addr);
23145 ix86_expand_strlensi_unroll_1 (out, src, align);
23147 /* strlensi_unroll_1 returns the address of the zero at the end of
23148 the string, like memchr(), so compute the length by subtracting
23149 the start address. */
23150 emit_insn (ix86_gen_sub3 (out, out, addr));
23156 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23157 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
23160 scratch2 = gen_reg_rtx (Pmode);
23161 scratch3 = gen_reg_rtx (Pmode);
23162 scratch4 = force_reg (Pmode, constm1_rtx);
23164 emit_move_insn (scratch3, addr);
23165 eoschar = force_reg (QImode, eoschar);
23167 src = replace_equiv_address_nv (src, scratch3);
23169 /* If .md starts supporting :P, this can be done in .md. */
23170 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
23171 scratch4), UNSPEC_SCAS);
23172 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
23173 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
23174 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
23179 /* For given symbol (function) construct code to compute address of it's PLT
23180 entry in large x86-64 PIC model. */
23182 construct_plt_address (rtx symbol)
23184 rtx tmp = gen_reg_rtx (Pmode);
23185 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
23187 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
23188 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
23190 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
23191 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
23196 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
23198 rtx pop, bool sibcall)
23200 /* We need to represent that SI and DI registers are clobbered
23202 static int clobbered_registers[] = {
23203 XMM6_REG, XMM7_REG, XMM8_REG,
23204 XMM9_REG, XMM10_REG, XMM11_REG,
23205 XMM12_REG, XMM13_REG, XMM14_REG,
23206 XMM15_REG, SI_REG, DI_REG
23208 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
23209 rtx use = NULL, call;
23210 unsigned int vec_len;
23212 if (pop == const0_rtx)
23214 gcc_assert (!TARGET_64BIT || !pop);
23216 if (TARGET_MACHO && !TARGET_64BIT)
23219 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
23220 fnaddr = machopic_indirect_call_target (fnaddr);
23225 /* Static functions and indirect calls don't need the pic register. */
23226 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
23227 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23228 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
23229 use_reg (&use, pic_offset_table_rtx);
23232 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
23234 rtx al = gen_rtx_REG (QImode, AX_REG);
23235 emit_move_insn (al, callarg2);
23236 use_reg (&use, al);
23239 if (ix86_cmodel == CM_LARGE_PIC
23241 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23242 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
23243 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
23245 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
23246 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
23248 fnaddr = XEXP (fnaddr, 0);
23249 if (GET_MODE (fnaddr) != Pmode)
23250 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
23251 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
23255 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
23257 call = gen_rtx_SET (VOIDmode, retval, call);
23258 vec[vec_len++] = call;
23262 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
23263 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
23264 vec[vec_len++] = pop;
23267 if (TARGET_64BIT_MS_ABI
23268 && (!callarg2 || INTVAL (callarg2) != -2))
23272 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
23273 UNSPEC_MS_TO_SYSV_CALL);
23275 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
23277 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
23279 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
23281 clobbered_registers[i]));
23284 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23285 if (TARGET_VZEROUPPER)
23288 if (cfun->machine->callee_pass_avx256_p)
23290 if (cfun->machine->callee_return_avx256_p)
23291 avx256 = callee_return_pass_avx256;
23293 avx256 = callee_pass_avx256;
23295 else if (cfun->machine->callee_return_avx256_p)
23296 avx256 = callee_return_avx256;
23298 avx256 = call_no_avx256;
23300 if (reload_completed)
23301 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
23303 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
23304 gen_rtvec (1, GEN_INT (avx256)),
23305 UNSPEC_CALL_NEEDS_VZEROUPPER);
23309 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
23310 call = emit_call_insn (call);
23312 CALL_INSN_FUNCTION_USAGE (call) = use;
23318 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
23320 rtx pat = PATTERN (insn);
23321 rtvec vec = XVEC (pat, 0);
23322 int len = GET_NUM_ELEM (vec) - 1;
23324 /* Strip off the last entry of the parallel. */
23325 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
23326 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
23328 pat = RTVEC_ELT (vec, 0);
23330 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
23332 emit_insn (gen_avx_vzeroupper (vzeroupper));
23333 emit_call_insn (pat);
23336 /* Output the assembly for a call instruction. */
23339 ix86_output_call_insn (rtx insn, rtx call_op)
23341 bool direct_p = constant_call_address_operand (call_op, Pmode);
23342 bool seh_nop_p = false;
23345 if (SIBLING_CALL_P (insn))
23349 /* SEH epilogue detection requires the indirect branch case
23350 to include REX.W. */
23351 else if (TARGET_SEH)
23352 xasm = "rex.W jmp %A0";
23356 output_asm_insn (xasm, &call_op);
23360 /* SEH unwinding can require an extra nop to be emitted in several
23361 circumstances. Determine if we have one of those. */
23366 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
23368 /* If we get to another real insn, we don't need the nop. */
23372 /* If we get to the epilogue note, prevent a catch region from
23373 being adjacent to the standard epilogue sequence. If non-
23374 call-exceptions, we'll have done this during epilogue emission. */
23375 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
23376 && !flag_non_call_exceptions
23377 && !can_throw_internal (insn))
23384 /* If we didn't find a real insn following the call, prevent the
23385 unwinder from looking into the next function. */
23391 xasm = "call\t%P0";
23393 xasm = "call\t%A0";
23395 output_asm_insn (xasm, &call_op);
23403 /* Clear stack slot assignments remembered from previous functions.
23404 This is called from INIT_EXPANDERS once before RTL is emitted for each
23407 static struct machine_function *
23408 ix86_init_machine_status (void)
23410 struct machine_function *f;
23412 f = ggc_alloc_cleared_machine_function ();
23413 f->use_fast_prologue_epilogue_nregs = -1;
23414 f->call_abi = ix86_abi;
23419 /* Return a MEM corresponding to a stack slot with mode MODE.
23420 Allocate a new slot if necessary.
23422 The RTL for a function can have several slots available: N is
23423 which slot to use. */
23426 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
23428 struct stack_local_entry *s;
23430 gcc_assert (n < MAX_386_STACK_LOCALS);
23432 for (s = ix86_stack_locals; s; s = s->next)
23433 if (s->mode == mode && s->n == n)
23434 return validize_mem (copy_rtx (s->rtl));
23436 s = ggc_alloc_stack_local_entry ();
23439 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
23441 s->next = ix86_stack_locals;
23442 ix86_stack_locals = s;
23443 return validize_mem (s->rtl);
23447 ix86_instantiate_decls (void)
23449 struct stack_local_entry *s;
23451 for (s = ix86_stack_locals; s; s = s->next)
23452 if (s->rtl != NULL_RTX)
23453 instantiate_decl_rtl (s->rtl);
23456 /* Calculate the length of the memory address in the instruction encoding.
23457 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23458 or other prefixes. We never generate addr32 prefix for LEA insn. */
23461 memory_address_length (rtx addr, bool lea)
23463 struct ix86_address parts;
23464 rtx base, index, disp;
23468 if (GET_CODE (addr) == PRE_DEC
23469 || GET_CODE (addr) == POST_INC
23470 || GET_CODE (addr) == PRE_MODIFY
23471 || GET_CODE (addr) == POST_MODIFY)
23474 ok = ix86_decompose_address (addr, &parts);
23477 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
23479 /* If this is not LEA instruction, add the length of addr32 prefix. */
23480 if (TARGET_64BIT && !lea
23481 && (SImode_address_operand (addr, VOIDmode)
23482 || (parts.base && GET_MODE (parts.base) == SImode)
23483 || (parts.index && GET_MODE (parts.index) == SImode)))
23487 index = parts.index;
23490 if (base && GET_CODE (base) == SUBREG)
23491 base = SUBREG_REG (base);
23492 if (index && GET_CODE (index) == SUBREG)
23493 index = SUBREG_REG (index);
23495 gcc_assert (base == NULL_RTX || REG_P (base));
23496 gcc_assert (index == NULL_RTX || REG_P (index));
23499 - esp as the base always wants an index,
23500 - ebp as the base always wants a displacement,
23501 - r12 as the base always wants an index,
23502 - r13 as the base always wants a displacement. */
23504 /* Register Indirect. */
23505 if (base && !index && !disp)
23507 /* esp (for its index) and ebp (for its displacement) need
23508 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23510 if (base == arg_pointer_rtx
23511 || base == frame_pointer_rtx
23512 || REGNO (base) == SP_REG
23513 || REGNO (base) == BP_REG
23514 || REGNO (base) == R12_REG
23515 || REGNO (base) == R13_REG)
23519 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23520 is not disp32, but disp32(%rip), so for disp32
23521 SIB byte is needed, unless print_operand_address
23522 optimizes it into disp32(%rip) or (%rip) is implied
23524 else if (disp && !base && !index)
23531 if (GET_CODE (disp) == CONST)
23532 symbol = XEXP (disp, 0);
23533 if (GET_CODE (symbol) == PLUS
23534 && CONST_INT_P (XEXP (symbol, 1)))
23535 symbol = XEXP (symbol, 0);
23537 if (GET_CODE (symbol) != LABEL_REF
23538 && (GET_CODE (symbol) != SYMBOL_REF
23539 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
23540 && (GET_CODE (symbol) != UNSPEC
23541 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
23542 && XINT (symbol, 1) != UNSPEC_PCREL
23543 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
23549 /* Find the length of the displacement constant. */
23552 if (base && satisfies_constraint_K (disp))
23557 /* ebp always wants a displacement. Similarly r13. */
23558 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
23561 /* An index requires the two-byte modrm form.... */
23563 /* ...like esp (or r12), which always wants an index. */
23564 || base == arg_pointer_rtx
23565 || base == frame_pointer_rtx
23566 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
23573 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23574 is set, expect that insn have 8bit immediate alternative. */
23576 ix86_attr_length_immediate_default (rtx insn, bool shortform)
23580 extract_insn_cached (insn);
23581 for (i = recog_data.n_operands - 1; i >= 0; --i)
23582 if (CONSTANT_P (recog_data.operand[i]))
23584 enum attr_mode mode = get_attr_mode (insn);
23587 if (shortform && CONST_INT_P (recog_data.operand[i]))
23589 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
23596 ival = trunc_int_for_mode (ival, HImode);
23599 ival = trunc_int_for_mode (ival, SImode);
23604 if (IN_RANGE (ival, -128, 127))
23621 /* Immediates for DImode instructions are encoded
23622 as 32bit sign extended values. */
23627 fatal_insn ("unknown insn mode", insn);
23633 /* Compute default value for "length_address" attribute. */
23635 ix86_attr_length_address_default (rtx insn)
23639 if (get_attr_type (insn) == TYPE_LEA)
23641 rtx set = PATTERN (insn), addr;
23643 if (GET_CODE (set) == PARALLEL)
23644 set = XVECEXP (set, 0, 0);
23646 gcc_assert (GET_CODE (set) == SET);
23648 addr = SET_SRC (set);
23650 return memory_address_length (addr, true);
23653 extract_insn_cached (insn);
23654 for (i = recog_data.n_operands - 1; i >= 0; --i)
23655 if (MEM_P (recog_data.operand[i]))
23657 constrain_operands_cached (reload_completed);
23658 if (which_alternative != -1)
23660 const char *constraints = recog_data.constraints[i];
23661 int alt = which_alternative;
23663 while (*constraints == '=' || *constraints == '+')
23666 while (*constraints++ != ',')
23668 /* Skip ignored operands. */
23669 if (*constraints == 'X')
23672 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
23677 /* Compute default value for "length_vex" attribute. It includes
23678 2 or 3 byte VEX prefix and 1 opcode byte. */
23681 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
23685 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23686 byte VEX prefix. */
23687 if (!has_0f_opcode || has_vex_w)
23690 /* We can always use 2 byte VEX prefix in 32bit. */
23694 extract_insn_cached (insn);
23696 for (i = recog_data.n_operands - 1; i >= 0; --i)
23697 if (REG_P (recog_data.operand[i]))
23699 /* REX.W bit uses 3 byte VEX prefix. */
23700 if (GET_MODE (recog_data.operand[i]) == DImode
23701 && GENERAL_REG_P (recog_data.operand[i]))
23706 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23707 if (MEM_P (recog_data.operand[i])
23708 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
23715 /* Return the maximum number of instructions a cpu can issue. */
23718 ix86_issue_rate (void)
23722 case PROCESSOR_PENTIUM:
23723 case PROCESSOR_ATOM:
23727 case PROCESSOR_PENTIUMPRO:
23728 case PROCESSOR_PENTIUM4:
23729 case PROCESSOR_CORE2_32:
23730 case PROCESSOR_CORE2_64:
23731 case PROCESSOR_COREI7_32:
23732 case PROCESSOR_COREI7_64:
23733 case PROCESSOR_ATHLON:
23735 case PROCESSOR_AMDFAM10:
23736 case PROCESSOR_NOCONA:
23737 case PROCESSOR_GENERIC32:
23738 case PROCESSOR_GENERIC64:
23739 case PROCESSOR_BDVER1:
23740 case PROCESSOR_BDVER2:
23741 case PROCESSOR_BTVER1:
23749 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23750 by DEP_INSN and nothing set by DEP_INSN. */
23753 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
23757 /* Simplify the test for uninteresting insns. */
23758 if (insn_type != TYPE_SETCC
23759 && insn_type != TYPE_ICMOV
23760 && insn_type != TYPE_FCMOV
23761 && insn_type != TYPE_IBR)
23764 if ((set = single_set (dep_insn)) != 0)
23766 set = SET_DEST (set);
23769 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
23770 && XVECLEN (PATTERN (dep_insn), 0) == 2
23771 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
23772 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
23774 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
23775 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
23780 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
23783 /* This test is true if the dependent insn reads the flags but
23784 not any other potentially set register. */
23785 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
23788 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
23794 /* Return true iff USE_INSN has a memory address with operands set by
23798 ix86_agi_dependent (rtx set_insn, rtx use_insn)
23801 extract_insn_cached (use_insn);
23802 for (i = recog_data.n_operands - 1; i >= 0; --i)
23803 if (MEM_P (recog_data.operand[i]))
23805 rtx addr = XEXP (recog_data.operand[i], 0);
23806 return modified_in_p (addr, set_insn) != 0;
23812 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
23814 enum attr_type insn_type, dep_insn_type;
23815 enum attr_memory memory;
23817 int dep_insn_code_number;
23819 /* Anti and output dependencies have zero cost on all CPUs. */
23820 if (REG_NOTE_KIND (link) != 0)
23823 dep_insn_code_number = recog_memoized (dep_insn);
23825 /* If we can't recognize the insns, we can't really do anything. */
23826 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
23829 insn_type = get_attr_type (insn);
23830 dep_insn_type = get_attr_type (dep_insn);
23834 case PROCESSOR_PENTIUM:
23835 /* Address Generation Interlock adds a cycle of latency. */
23836 if (insn_type == TYPE_LEA)
23838 rtx addr = PATTERN (insn);
23840 if (GET_CODE (addr) == PARALLEL)
23841 addr = XVECEXP (addr, 0, 0);
23843 gcc_assert (GET_CODE (addr) == SET);
23845 addr = SET_SRC (addr);
23846 if (modified_in_p (addr, dep_insn))
23849 else if (ix86_agi_dependent (dep_insn, insn))
23852 /* ??? Compares pair with jump/setcc. */
23853 if (ix86_flags_dependent (insn, dep_insn, insn_type))
23856 /* Floating point stores require value to be ready one cycle earlier. */
23857 if (insn_type == TYPE_FMOV
23858 && get_attr_memory (insn) == MEMORY_STORE
23859 && !ix86_agi_dependent (dep_insn, insn))
23863 case PROCESSOR_PENTIUMPRO:
23864 memory = get_attr_memory (insn);
23866 /* INT->FP conversion is expensive. */
23867 if (get_attr_fp_int_src (dep_insn))
23870 /* There is one cycle extra latency between an FP op and a store. */
23871 if (insn_type == TYPE_FMOV
23872 && (set = single_set (dep_insn)) != NULL_RTX
23873 && (set2 = single_set (insn)) != NULL_RTX
23874 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
23875 && MEM_P (SET_DEST (set2)))
23878 /* Show ability of reorder buffer to hide latency of load by executing
23879 in parallel with previous instruction in case
23880 previous instruction is not needed to compute the address. */
23881 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23882 && !ix86_agi_dependent (dep_insn, insn))
23884 /* Claim moves to take one cycle, as core can issue one load
23885 at time and the next load can start cycle later. */
23886 if (dep_insn_type == TYPE_IMOV
23887 || dep_insn_type == TYPE_FMOV)
23895 memory = get_attr_memory (insn);
23897 /* The esp dependency is resolved before the instruction is really
23899 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
23900 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
23903 /* INT->FP conversion is expensive. */
23904 if (get_attr_fp_int_src (dep_insn))
23907 /* Show ability of reorder buffer to hide latency of load by executing
23908 in parallel with previous instruction in case
23909 previous instruction is not needed to compute the address. */
23910 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23911 && !ix86_agi_dependent (dep_insn, insn))
23913 /* Claim moves to take one cycle, as core can issue one load
23914 at time and the next load can start cycle later. */
23915 if (dep_insn_type == TYPE_IMOV
23916 || dep_insn_type == TYPE_FMOV)
23925 case PROCESSOR_ATHLON:
23927 case PROCESSOR_AMDFAM10:
23928 case PROCESSOR_BDVER1:
23929 case PROCESSOR_BDVER2:
23930 case PROCESSOR_BTVER1:
23931 case PROCESSOR_ATOM:
23932 case PROCESSOR_GENERIC32:
23933 case PROCESSOR_GENERIC64:
23934 memory = get_attr_memory (insn);
23936 /* Show ability of reorder buffer to hide latency of load by executing
23937 in parallel with previous instruction in case
23938 previous instruction is not needed to compute the address. */
23939 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23940 && !ix86_agi_dependent (dep_insn, insn))
23942 enum attr_unit unit = get_attr_unit (insn);
23945 /* Because of the difference between the length of integer and
23946 floating unit pipeline preparation stages, the memory operands
23947 for floating point are cheaper.
23949 ??? For Athlon it the difference is most probably 2. */
23950 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
23953 loadcost = TARGET_ATHLON ? 2 : 0;
23955 if (cost >= loadcost)
23968 /* How many alternative schedules to try. This should be as wide as the
23969 scheduling freedom in the DFA, but no wider. Making this value too
23970 large results extra work for the scheduler. */
23973 ia32_multipass_dfa_lookahead (void)
23977 case PROCESSOR_PENTIUM:
23980 case PROCESSOR_PENTIUMPRO:
23984 case PROCESSOR_CORE2_32:
23985 case PROCESSOR_CORE2_64:
23986 case PROCESSOR_COREI7_32:
23987 case PROCESSOR_COREI7_64:
23988 case PROCESSOR_ATOM:
23989 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23990 as many instructions can be executed on a cycle, i.e.,
23991 issue_rate. I wonder why tuning for many CPUs does not do this. */
23992 return ix86_issue_rate ();
24001 /* Model decoder of Core 2/i7.
24002 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24003 track the instruction fetch block boundaries and make sure that long
24004 (9+ bytes) instructions are assigned to D0. */
24006 /* Maximum length of an insn that can be handled by
24007 a secondary decoder unit. '8' for Core 2/i7. */
24008 static int core2i7_secondary_decoder_max_insn_size;
24010 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24011 '16' for Core 2/i7. */
24012 static int core2i7_ifetch_block_size;
24014 /* Maximum number of instructions decoder can handle per cycle.
24015 '6' for Core 2/i7. */
24016 static int core2i7_ifetch_block_max_insns;
24018 typedef struct ix86_first_cycle_multipass_data_ *
24019 ix86_first_cycle_multipass_data_t;
24020 typedef const struct ix86_first_cycle_multipass_data_ *
24021 const_ix86_first_cycle_multipass_data_t;
24023 /* A variable to store target state across calls to max_issue within
24025 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
24026 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
24028 /* Initialize DATA. */
24030 core2i7_first_cycle_multipass_init (void *_data)
24032 ix86_first_cycle_multipass_data_t data
24033 = (ix86_first_cycle_multipass_data_t) _data;
24035 data->ifetch_block_len = 0;
24036 data->ifetch_block_n_insns = 0;
24037 data->ready_try_change = NULL;
24038 data->ready_try_change_size = 0;
24041 /* Advancing the cycle; reset ifetch block counts. */
24043 core2i7_dfa_post_advance_cycle (void)
24045 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
24047 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
24049 data->ifetch_block_len = 0;
24050 data->ifetch_block_n_insns = 0;
24053 static int min_insn_size (rtx);
24055 /* Filter out insns from ready_try that the core will not be able to issue
24056 on current cycle due to decoder. */
24058 core2i7_first_cycle_multipass_filter_ready_try
24059 (const_ix86_first_cycle_multipass_data_t data,
24060 char *ready_try, int n_ready, bool first_cycle_insn_p)
24067 if (ready_try[n_ready])
24070 insn = get_ready_element (n_ready);
24071 insn_size = min_insn_size (insn);
24073 if (/* If this is a too long an insn for a secondary decoder ... */
24074 (!first_cycle_insn_p
24075 && insn_size > core2i7_secondary_decoder_max_insn_size)
24076 /* ... or it would not fit into the ifetch block ... */
24077 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
24078 /* ... or the decoder is full already ... */
24079 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
24080 /* ... mask the insn out. */
24082 ready_try[n_ready] = 1;
24084 if (data->ready_try_change)
24085 SET_BIT (data->ready_try_change, n_ready);
24090 /* Prepare for a new round of multipass lookahead scheduling. */
24092 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
24093 bool first_cycle_insn_p)
24095 ix86_first_cycle_multipass_data_t data
24096 = (ix86_first_cycle_multipass_data_t) _data;
24097 const_ix86_first_cycle_multipass_data_t prev_data
24098 = ix86_first_cycle_multipass_data;
24100 /* Restore the state from the end of the previous round. */
24101 data->ifetch_block_len = prev_data->ifetch_block_len;
24102 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
24104 /* Filter instructions that cannot be issued on current cycle due to
24105 decoder restrictions. */
24106 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
24107 first_cycle_insn_p);
24110 /* INSN is being issued in current solution. Account for its impact on
24111 the decoder model. */
24113 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
24114 rtx insn, const void *_prev_data)
24116 ix86_first_cycle_multipass_data_t data
24117 = (ix86_first_cycle_multipass_data_t) _data;
24118 const_ix86_first_cycle_multipass_data_t prev_data
24119 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
24121 int insn_size = min_insn_size (insn);
24123 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
24124 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
24125 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
24126 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
24128 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24129 if (!data->ready_try_change)
24131 data->ready_try_change = sbitmap_alloc (n_ready);
24132 data->ready_try_change_size = n_ready;
24134 else if (data->ready_try_change_size < n_ready)
24136 data->ready_try_change = sbitmap_resize (data->ready_try_change,
24138 data->ready_try_change_size = n_ready;
24140 sbitmap_zero (data->ready_try_change);
24142 /* Filter out insns from ready_try that the core will not be able to issue
24143 on current cycle due to decoder. */
24144 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
24148 /* Revert the effect on ready_try. */
24150 core2i7_first_cycle_multipass_backtrack (const void *_data,
24152 int n_ready ATTRIBUTE_UNUSED)
24154 const_ix86_first_cycle_multipass_data_t data
24155 = (const_ix86_first_cycle_multipass_data_t) _data;
24156 unsigned int i = 0;
24157 sbitmap_iterator sbi;
24159 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
24160 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
24166 /* Save the result of multipass lookahead scheduling for the next round. */
24168 core2i7_first_cycle_multipass_end (const void *_data)
24170 const_ix86_first_cycle_multipass_data_t data
24171 = (const_ix86_first_cycle_multipass_data_t) _data;
24172 ix86_first_cycle_multipass_data_t next_data
24173 = ix86_first_cycle_multipass_data;
24177 next_data->ifetch_block_len = data->ifetch_block_len;
24178 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
24182 /* Deallocate target data. */
24184 core2i7_first_cycle_multipass_fini (void *_data)
24186 ix86_first_cycle_multipass_data_t data
24187 = (ix86_first_cycle_multipass_data_t) _data;
24189 if (data->ready_try_change)
24191 sbitmap_free (data->ready_try_change);
24192 data->ready_try_change = NULL;
24193 data->ready_try_change_size = 0;
24197 /* Prepare for scheduling pass. */
24199 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
24200 int verbose ATTRIBUTE_UNUSED,
24201 int max_uid ATTRIBUTE_UNUSED)
24203 /* Install scheduling hooks for current CPU. Some of these hooks are used
24204 in time-critical parts of the scheduler, so we only set them up when
24205 they are actually used. */
24208 case PROCESSOR_CORE2_32:
24209 case PROCESSOR_CORE2_64:
24210 case PROCESSOR_COREI7_32:
24211 case PROCESSOR_COREI7_64:
24212 targetm.sched.dfa_post_advance_cycle
24213 = core2i7_dfa_post_advance_cycle;
24214 targetm.sched.first_cycle_multipass_init
24215 = core2i7_first_cycle_multipass_init;
24216 targetm.sched.first_cycle_multipass_begin
24217 = core2i7_first_cycle_multipass_begin;
24218 targetm.sched.first_cycle_multipass_issue
24219 = core2i7_first_cycle_multipass_issue;
24220 targetm.sched.first_cycle_multipass_backtrack
24221 = core2i7_first_cycle_multipass_backtrack;
24222 targetm.sched.first_cycle_multipass_end
24223 = core2i7_first_cycle_multipass_end;
24224 targetm.sched.first_cycle_multipass_fini
24225 = core2i7_first_cycle_multipass_fini;
24227 /* Set decoder parameters. */
24228 core2i7_secondary_decoder_max_insn_size = 8;
24229 core2i7_ifetch_block_size = 16;
24230 core2i7_ifetch_block_max_insns = 6;
24234 targetm.sched.dfa_post_advance_cycle = NULL;
24235 targetm.sched.first_cycle_multipass_init = NULL;
24236 targetm.sched.first_cycle_multipass_begin = NULL;
24237 targetm.sched.first_cycle_multipass_issue = NULL;
24238 targetm.sched.first_cycle_multipass_backtrack = NULL;
24239 targetm.sched.first_cycle_multipass_end = NULL;
24240 targetm.sched.first_cycle_multipass_fini = NULL;
24246 /* Compute the alignment given to a constant that is being placed in memory.
24247 EXP is the constant and ALIGN is the alignment that the object would
24249 The value of this function is used instead of that alignment to align
24253 ix86_constant_alignment (tree exp, int align)
24255 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
24256 || TREE_CODE (exp) == INTEGER_CST)
24258 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
24260 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
24263 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
24264 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
24265 return BITS_PER_WORD;
24270 /* Compute the alignment for a static variable.
24271 TYPE is the data type, and ALIGN is the alignment that
24272 the object would ordinarily have. The value of this function is used
24273 instead of that alignment to align the object. */
24276 ix86_data_alignment (tree type, int align)
24279 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
24281 if (AGGREGATE_TYPE_P (type)
24282 && TYPE_SIZE (type)
24283 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24284 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
24285 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
24286 && align < max_align)
24289 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24290 to 16byte boundary. */
24293 if (AGGREGATE_TYPE_P (type)
24294 && TYPE_SIZE (type)
24295 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24296 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
24297 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
24301 if (TREE_CODE (type) == ARRAY_TYPE)
24303 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
24305 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
24308 else if (TREE_CODE (type) == COMPLEX_TYPE)
24311 if (TYPE_MODE (type) == DCmode && align < 64)
24313 if ((TYPE_MODE (type) == XCmode
24314 || TYPE_MODE (type) == TCmode) && align < 128)
24317 else if ((TREE_CODE (type) == RECORD_TYPE
24318 || TREE_CODE (type) == UNION_TYPE
24319 || TREE_CODE (type) == QUAL_UNION_TYPE)
24320 && TYPE_FIELDS (type))
24322 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
24324 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
24327 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
24328 || TREE_CODE (type) == INTEGER_TYPE)
24330 if (TYPE_MODE (type) == DFmode && align < 64)
24332 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
24339 /* Compute the alignment for a local variable or a stack slot. EXP is
24340 the data type or decl itself, MODE is the widest mode available and
24341 ALIGN is the alignment that the object would ordinarily have. The
24342 value of this macro is used instead of that alignment to align the
24346 ix86_local_alignment (tree exp, enum machine_mode mode,
24347 unsigned int align)
24351 if (exp && DECL_P (exp))
24353 type = TREE_TYPE (exp);
24362 /* Don't do dynamic stack realignment for long long objects with
24363 -mpreferred-stack-boundary=2. */
24366 && ix86_preferred_stack_boundary < 64
24367 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
24368 && (!type || !TYPE_USER_ALIGN (type))
24369 && (!decl || !DECL_USER_ALIGN (decl)))
24372 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24373 register in MODE. We will return the largest alignment of XF
24377 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
24378 align = GET_MODE_ALIGNMENT (DFmode);
24382 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24383 to 16byte boundary. Exact wording is:
24385 An array uses the same alignment as its elements, except that a local or
24386 global array variable of length at least 16 bytes or
24387 a C99 variable-length array variable always has alignment of at least 16 bytes.
24389 This was added to allow use of aligned SSE instructions at arrays. This
24390 rule is meant for static storage (where compiler can not do the analysis
24391 by itself). We follow it for automatic variables only when convenient.
24392 We fully control everything in the function compiled and functions from
24393 other unit can not rely on the alignment.
24395 Exclude va_list type. It is the common case of local array where
24396 we can not benefit from the alignment. */
24397 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
24400 if (AGGREGATE_TYPE_P (type)
24401 && (va_list_type_node == NULL_TREE
24402 || (TYPE_MAIN_VARIANT (type)
24403 != TYPE_MAIN_VARIANT (va_list_type_node)))
24404 && TYPE_SIZE (type)
24405 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24406 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
24407 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
24410 if (TREE_CODE (type) == ARRAY_TYPE)
24412 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
24414 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
24417 else if (TREE_CODE (type) == COMPLEX_TYPE)
24419 if (TYPE_MODE (type) == DCmode && align < 64)
24421 if ((TYPE_MODE (type) == XCmode
24422 || TYPE_MODE (type) == TCmode) && align < 128)
24425 else if ((TREE_CODE (type) == RECORD_TYPE
24426 || TREE_CODE (type) == UNION_TYPE
24427 || TREE_CODE (type) == QUAL_UNION_TYPE)
24428 && TYPE_FIELDS (type))
24430 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
24432 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
24435 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
24436 || TREE_CODE (type) == INTEGER_TYPE)
24439 if (TYPE_MODE (type) == DFmode && align < 64)
24441 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
24447 /* Compute the minimum required alignment for dynamic stack realignment
24448 purposes for a local variable, parameter or a stack slot. EXP is
24449 the data type or decl itself, MODE is its mode and ALIGN is the
24450 alignment that the object would ordinarily have. */
24453 ix86_minimum_alignment (tree exp, enum machine_mode mode,
24454 unsigned int align)
24458 if (exp && DECL_P (exp))
24460 type = TREE_TYPE (exp);
24469 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
24472 /* Don't do dynamic stack realignment for long long objects with
24473 -mpreferred-stack-boundary=2. */
24474 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
24475 && (!type || !TYPE_USER_ALIGN (type))
24476 && (!decl || !DECL_USER_ALIGN (decl)))
24482 /* Find a location for the static chain incoming to a nested function.
24483 This is a register, unless all free registers are used by arguments. */
24486 ix86_static_chain (const_tree fndecl, bool incoming_p)
24490 if (!DECL_STATIC_CHAIN (fndecl))
24495 /* We always use R10 in 64-bit mode. */
24503 /* By default in 32-bit mode we use ECX to pass the static chain. */
24506 fntype = TREE_TYPE (fndecl);
24507 ccvt = ix86_get_callcvt (fntype);
24508 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
24510 /* Fastcall functions use ecx/edx for arguments, which leaves
24511 us with EAX for the static chain.
24512 Thiscall functions use ecx for arguments, which also
24513 leaves us with EAX for the static chain. */
24516 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
24518 /* Thiscall functions use ecx for arguments, which leaves
24519 us with EAX and EDX for the static chain.
24520 We are using for abi-compatibility EAX. */
24523 else if (ix86_function_regparm (fntype, fndecl) == 3)
24525 /* For regparm 3, we have no free call-clobbered registers in
24526 which to store the static chain. In order to implement this,
24527 we have the trampoline push the static chain to the stack.
24528 However, we can't push a value below the return address when
24529 we call the nested function directly, so we have to use an
24530 alternate entry point. For this we use ESI, and have the
24531 alternate entry point push ESI, so that things appear the
24532 same once we're executing the nested function. */
24535 if (fndecl == current_function_decl)
24536 ix86_static_chain_on_stack = true;
24537 return gen_frame_mem (SImode,
24538 plus_constant (arg_pointer_rtx, -8));
24544 return gen_rtx_REG (Pmode, regno);
24547 /* Emit RTL insns to initialize the variable parts of a trampoline.
24548 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24549 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24550 to be passed to the target function. */
24553 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
24559 fnaddr = XEXP (DECL_RTL (fndecl), 0);
24565 /* Load the function address to r11. Try to load address using
24566 the shorter movl instead of movabs. We may want to support
24567 movq for kernel mode, but kernel does not use trampolines at
24569 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
24571 fnaddr = copy_to_mode_reg (DImode, fnaddr);
24573 mem = adjust_address (m_tramp, HImode, offset);
24574 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
24576 mem = adjust_address (m_tramp, SImode, offset + 2);
24577 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
24582 mem = adjust_address (m_tramp, HImode, offset);
24583 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
24585 mem = adjust_address (m_tramp, DImode, offset + 2);
24586 emit_move_insn (mem, fnaddr);
24590 /* Load static chain using movabs to r10. Use the
24591 shorter movl instead of movabs for x32. */
24603 mem = adjust_address (m_tramp, HImode, offset);
24604 emit_move_insn (mem, gen_int_mode (opcode, HImode));
24606 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
24607 emit_move_insn (mem, chain_value);
24610 /* Jump to r11; the last (unused) byte is a nop, only there to
24611 pad the write out to a single 32-bit store. */
24612 mem = adjust_address (m_tramp, SImode, offset);
24613 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
24620 /* Depending on the static chain location, either load a register
24621 with a constant, or push the constant to the stack. All of the
24622 instructions are the same size. */
24623 chain = ix86_static_chain (fndecl, true);
24626 switch (REGNO (chain))
24629 opcode = 0xb8; break;
24631 opcode = 0xb9; break;
24633 gcc_unreachable ();
24639 mem = adjust_address (m_tramp, QImode, offset);
24640 emit_move_insn (mem, gen_int_mode (opcode, QImode));
24642 mem = adjust_address (m_tramp, SImode, offset + 1);
24643 emit_move_insn (mem, chain_value);
24646 mem = adjust_address (m_tramp, QImode, offset);
24647 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
24649 mem = adjust_address (m_tramp, SImode, offset + 1);
24651 /* Compute offset from the end of the jmp to the target function.
24652 In the case in which the trampoline stores the static chain on
24653 the stack, we need to skip the first insn which pushes the
24654 (call-saved) register static chain; this push is 1 byte. */
24656 disp = expand_binop (SImode, sub_optab, fnaddr,
24657 plus_constant (XEXP (m_tramp, 0),
24658 offset - (MEM_P (chain) ? 1 : 0)),
24659 NULL_RTX, 1, OPTAB_DIRECT);
24660 emit_move_insn (mem, disp);
24663 gcc_assert (offset <= TRAMPOLINE_SIZE);
24665 #ifdef HAVE_ENABLE_EXECUTE_STACK
24666 #ifdef CHECK_EXECUTE_STACK_ENABLED
24667 if (CHECK_EXECUTE_STACK_ENABLED)
24669 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
24670 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
24674 /* The following file contains several enumerations and data structures
24675 built from the definitions in i386-builtin-types.def. */
24677 #include "i386-builtin-types.inc"
24679 /* Table for the ix86 builtin non-function types. */
24680 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
24682 /* Retrieve an element from the above table, building some of
24683 the types lazily. */
24686 ix86_get_builtin_type (enum ix86_builtin_type tcode)
24688 unsigned int index;
24691 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
24693 type = ix86_builtin_type_tab[(int) tcode];
24697 gcc_assert (tcode > IX86_BT_LAST_PRIM);
24698 if (tcode <= IX86_BT_LAST_VECT)
24700 enum machine_mode mode;
24702 index = tcode - IX86_BT_LAST_PRIM - 1;
24703 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
24704 mode = ix86_builtin_type_vect_mode[index];
24706 type = build_vector_type_for_mode (itype, mode);
24712 index = tcode - IX86_BT_LAST_VECT - 1;
24713 if (tcode <= IX86_BT_LAST_PTR)
24714 quals = TYPE_UNQUALIFIED;
24716 quals = TYPE_QUAL_CONST;
24718 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
24719 if (quals != TYPE_UNQUALIFIED)
24720 itype = build_qualified_type (itype, quals);
24722 type = build_pointer_type (itype);
24725 ix86_builtin_type_tab[(int) tcode] = type;
24729 /* Table for the ix86 builtin function types. */
24730 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
24732 /* Retrieve an element from the above table, building some of
24733 the types lazily. */
24736 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
24740 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
24742 type = ix86_builtin_func_type_tab[(int) tcode];
24746 if (tcode <= IX86_BT_LAST_FUNC)
24748 unsigned start = ix86_builtin_func_start[(int) tcode];
24749 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
24750 tree rtype, atype, args = void_list_node;
24753 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
24754 for (i = after - 1; i > start; --i)
24756 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
24757 args = tree_cons (NULL, atype, args);
24760 type = build_function_type (rtype, args);
24764 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
24765 enum ix86_builtin_func_type icode;
24767 icode = ix86_builtin_func_alias_base[index];
24768 type = ix86_get_builtin_func_type (icode);
24771 ix86_builtin_func_type_tab[(int) tcode] = type;
24776 /* Codes for all the SSE/MMX builtins. */
24779 IX86_BUILTIN_ADDPS,
24780 IX86_BUILTIN_ADDSS,
24781 IX86_BUILTIN_DIVPS,
24782 IX86_BUILTIN_DIVSS,
24783 IX86_BUILTIN_MULPS,
24784 IX86_BUILTIN_MULSS,
24785 IX86_BUILTIN_SUBPS,
24786 IX86_BUILTIN_SUBSS,
24788 IX86_BUILTIN_CMPEQPS,
24789 IX86_BUILTIN_CMPLTPS,
24790 IX86_BUILTIN_CMPLEPS,
24791 IX86_BUILTIN_CMPGTPS,
24792 IX86_BUILTIN_CMPGEPS,
24793 IX86_BUILTIN_CMPNEQPS,
24794 IX86_BUILTIN_CMPNLTPS,
24795 IX86_BUILTIN_CMPNLEPS,
24796 IX86_BUILTIN_CMPNGTPS,
24797 IX86_BUILTIN_CMPNGEPS,
24798 IX86_BUILTIN_CMPORDPS,
24799 IX86_BUILTIN_CMPUNORDPS,
24800 IX86_BUILTIN_CMPEQSS,
24801 IX86_BUILTIN_CMPLTSS,
24802 IX86_BUILTIN_CMPLESS,
24803 IX86_BUILTIN_CMPNEQSS,
24804 IX86_BUILTIN_CMPNLTSS,
24805 IX86_BUILTIN_CMPNLESS,
24806 IX86_BUILTIN_CMPNGTSS,
24807 IX86_BUILTIN_CMPNGESS,
24808 IX86_BUILTIN_CMPORDSS,
24809 IX86_BUILTIN_CMPUNORDSS,
24811 IX86_BUILTIN_COMIEQSS,
24812 IX86_BUILTIN_COMILTSS,
24813 IX86_BUILTIN_COMILESS,
24814 IX86_BUILTIN_COMIGTSS,
24815 IX86_BUILTIN_COMIGESS,
24816 IX86_BUILTIN_COMINEQSS,
24817 IX86_BUILTIN_UCOMIEQSS,
24818 IX86_BUILTIN_UCOMILTSS,
24819 IX86_BUILTIN_UCOMILESS,
24820 IX86_BUILTIN_UCOMIGTSS,
24821 IX86_BUILTIN_UCOMIGESS,
24822 IX86_BUILTIN_UCOMINEQSS,
24824 IX86_BUILTIN_CVTPI2PS,
24825 IX86_BUILTIN_CVTPS2PI,
24826 IX86_BUILTIN_CVTSI2SS,
24827 IX86_BUILTIN_CVTSI642SS,
24828 IX86_BUILTIN_CVTSS2SI,
24829 IX86_BUILTIN_CVTSS2SI64,
24830 IX86_BUILTIN_CVTTPS2PI,
24831 IX86_BUILTIN_CVTTSS2SI,
24832 IX86_BUILTIN_CVTTSS2SI64,
24834 IX86_BUILTIN_MAXPS,
24835 IX86_BUILTIN_MAXSS,
24836 IX86_BUILTIN_MINPS,
24837 IX86_BUILTIN_MINSS,
24839 IX86_BUILTIN_LOADUPS,
24840 IX86_BUILTIN_STOREUPS,
24841 IX86_BUILTIN_MOVSS,
24843 IX86_BUILTIN_MOVHLPS,
24844 IX86_BUILTIN_MOVLHPS,
24845 IX86_BUILTIN_LOADHPS,
24846 IX86_BUILTIN_LOADLPS,
24847 IX86_BUILTIN_STOREHPS,
24848 IX86_BUILTIN_STORELPS,
24850 IX86_BUILTIN_MASKMOVQ,
24851 IX86_BUILTIN_MOVMSKPS,
24852 IX86_BUILTIN_PMOVMSKB,
24854 IX86_BUILTIN_MOVNTPS,
24855 IX86_BUILTIN_MOVNTQ,
24857 IX86_BUILTIN_LOADDQU,
24858 IX86_BUILTIN_STOREDQU,
24860 IX86_BUILTIN_PACKSSWB,
24861 IX86_BUILTIN_PACKSSDW,
24862 IX86_BUILTIN_PACKUSWB,
24864 IX86_BUILTIN_PADDB,
24865 IX86_BUILTIN_PADDW,
24866 IX86_BUILTIN_PADDD,
24867 IX86_BUILTIN_PADDQ,
24868 IX86_BUILTIN_PADDSB,
24869 IX86_BUILTIN_PADDSW,
24870 IX86_BUILTIN_PADDUSB,
24871 IX86_BUILTIN_PADDUSW,
24872 IX86_BUILTIN_PSUBB,
24873 IX86_BUILTIN_PSUBW,
24874 IX86_BUILTIN_PSUBD,
24875 IX86_BUILTIN_PSUBQ,
24876 IX86_BUILTIN_PSUBSB,
24877 IX86_BUILTIN_PSUBSW,
24878 IX86_BUILTIN_PSUBUSB,
24879 IX86_BUILTIN_PSUBUSW,
24882 IX86_BUILTIN_PANDN,
24886 IX86_BUILTIN_PAVGB,
24887 IX86_BUILTIN_PAVGW,
24889 IX86_BUILTIN_PCMPEQB,
24890 IX86_BUILTIN_PCMPEQW,
24891 IX86_BUILTIN_PCMPEQD,
24892 IX86_BUILTIN_PCMPGTB,
24893 IX86_BUILTIN_PCMPGTW,
24894 IX86_BUILTIN_PCMPGTD,
24896 IX86_BUILTIN_PMADDWD,
24898 IX86_BUILTIN_PMAXSW,
24899 IX86_BUILTIN_PMAXUB,
24900 IX86_BUILTIN_PMINSW,
24901 IX86_BUILTIN_PMINUB,
24903 IX86_BUILTIN_PMULHUW,
24904 IX86_BUILTIN_PMULHW,
24905 IX86_BUILTIN_PMULLW,
24907 IX86_BUILTIN_PSADBW,
24908 IX86_BUILTIN_PSHUFW,
24910 IX86_BUILTIN_PSLLW,
24911 IX86_BUILTIN_PSLLD,
24912 IX86_BUILTIN_PSLLQ,
24913 IX86_BUILTIN_PSRAW,
24914 IX86_BUILTIN_PSRAD,
24915 IX86_BUILTIN_PSRLW,
24916 IX86_BUILTIN_PSRLD,
24917 IX86_BUILTIN_PSRLQ,
24918 IX86_BUILTIN_PSLLWI,
24919 IX86_BUILTIN_PSLLDI,
24920 IX86_BUILTIN_PSLLQI,
24921 IX86_BUILTIN_PSRAWI,
24922 IX86_BUILTIN_PSRADI,
24923 IX86_BUILTIN_PSRLWI,
24924 IX86_BUILTIN_PSRLDI,
24925 IX86_BUILTIN_PSRLQI,
24927 IX86_BUILTIN_PUNPCKHBW,
24928 IX86_BUILTIN_PUNPCKHWD,
24929 IX86_BUILTIN_PUNPCKHDQ,
24930 IX86_BUILTIN_PUNPCKLBW,
24931 IX86_BUILTIN_PUNPCKLWD,
24932 IX86_BUILTIN_PUNPCKLDQ,
24934 IX86_BUILTIN_SHUFPS,
24936 IX86_BUILTIN_RCPPS,
24937 IX86_BUILTIN_RCPSS,
24938 IX86_BUILTIN_RSQRTPS,
24939 IX86_BUILTIN_RSQRTPS_NR,
24940 IX86_BUILTIN_RSQRTSS,
24941 IX86_BUILTIN_RSQRTF,
24942 IX86_BUILTIN_SQRTPS,
24943 IX86_BUILTIN_SQRTPS_NR,
24944 IX86_BUILTIN_SQRTSS,
24946 IX86_BUILTIN_UNPCKHPS,
24947 IX86_BUILTIN_UNPCKLPS,
24949 IX86_BUILTIN_ANDPS,
24950 IX86_BUILTIN_ANDNPS,
24952 IX86_BUILTIN_XORPS,
24955 IX86_BUILTIN_LDMXCSR,
24956 IX86_BUILTIN_STMXCSR,
24957 IX86_BUILTIN_SFENCE,
24959 /* 3DNow! Original */
24960 IX86_BUILTIN_FEMMS,
24961 IX86_BUILTIN_PAVGUSB,
24962 IX86_BUILTIN_PF2ID,
24963 IX86_BUILTIN_PFACC,
24964 IX86_BUILTIN_PFADD,
24965 IX86_BUILTIN_PFCMPEQ,
24966 IX86_BUILTIN_PFCMPGE,
24967 IX86_BUILTIN_PFCMPGT,
24968 IX86_BUILTIN_PFMAX,
24969 IX86_BUILTIN_PFMIN,
24970 IX86_BUILTIN_PFMUL,
24971 IX86_BUILTIN_PFRCP,
24972 IX86_BUILTIN_PFRCPIT1,
24973 IX86_BUILTIN_PFRCPIT2,
24974 IX86_BUILTIN_PFRSQIT1,
24975 IX86_BUILTIN_PFRSQRT,
24976 IX86_BUILTIN_PFSUB,
24977 IX86_BUILTIN_PFSUBR,
24978 IX86_BUILTIN_PI2FD,
24979 IX86_BUILTIN_PMULHRW,
24981 /* 3DNow! Athlon Extensions */
24982 IX86_BUILTIN_PF2IW,
24983 IX86_BUILTIN_PFNACC,
24984 IX86_BUILTIN_PFPNACC,
24985 IX86_BUILTIN_PI2FW,
24986 IX86_BUILTIN_PSWAPDSI,
24987 IX86_BUILTIN_PSWAPDSF,
24990 IX86_BUILTIN_ADDPD,
24991 IX86_BUILTIN_ADDSD,
24992 IX86_BUILTIN_DIVPD,
24993 IX86_BUILTIN_DIVSD,
24994 IX86_BUILTIN_MULPD,
24995 IX86_BUILTIN_MULSD,
24996 IX86_BUILTIN_SUBPD,
24997 IX86_BUILTIN_SUBSD,
24999 IX86_BUILTIN_CMPEQPD,
25000 IX86_BUILTIN_CMPLTPD,
25001 IX86_BUILTIN_CMPLEPD,
25002 IX86_BUILTIN_CMPGTPD,
25003 IX86_BUILTIN_CMPGEPD,
25004 IX86_BUILTIN_CMPNEQPD,
25005 IX86_BUILTIN_CMPNLTPD,
25006 IX86_BUILTIN_CMPNLEPD,
25007 IX86_BUILTIN_CMPNGTPD,
25008 IX86_BUILTIN_CMPNGEPD,
25009 IX86_BUILTIN_CMPORDPD,
25010 IX86_BUILTIN_CMPUNORDPD,
25011 IX86_BUILTIN_CMPEQSD,
25012 IX86_BUILTIN_CMPLTSD,
25013 IX86_BUILTIN_CMPLESD,
25014 IX86_BUILTIN_CMPNEQSD,
25015 IX86_BUILTIN_CMPNLTSD,
25016 IX86_BUILTIN_CMPNLESD,
25017 IX86_BUILTIN_CMPORDSD,
25018 IX86_BUILTIN_CMPUNORDSD,
25020 IX86_BUILTIN_COMIEQSD,
25021 IX86_BUILTIN_COMILTSD,
25022 IX86_BUILTIN_COMILESD,
25023 IX86_BUILTIN_COMIGTSD,
25024 IX86_BUILTIN_COMIGESD,
25025 IX86_BUILTIN_COMINEQSD,
25026 IX86_BUILTIN_UCOMIEQSD,
25027 IX86_BUILTIN_UCOMILTSD,
25028 IX86_BUILTIN_UCOMILESD,
25029 IX86_BUILTIN_UCOMIGTSD,
25030 IX86_BUILTIN_UCOMIGESD,
25031 IX86_BUILTIN_UCOMINEQSD,
25033 IX86_BUILTIN_MAXPD,
25034 IX86_BUILTIN_MAXSD,
25035 IX86_BUILTIN_MINPD,
25036 IX86_BUILTIN_MINSD,
25038 IX86_BUILTIN_ANDPD,
25039 IX86_BUILTIN_ANDNPD,
25041 IX86_BUILTIN_XORPD,
25043 IX86_BUILTIN_SQRTPD,
25044 IX86_BUILTIN_SQRTSD,
25046 IX86_BUILTIN_UNPCKHPD,
25047 IX86_BUILTIN_UNPCKLPD,
25049 IX86_BUILTIN_SHUFPD,
25051 IX86_BUILTIN_LOADUPD,
25052 IX86_BUILTIN_STOREUPD,
25053 IX86_BUILTIN_MOVSD,
25055 IX86_BUILTIN_LOADHPD,
25056 IX86_BUILTIN_LOADLPD,
25058 IX86_BUILTIN_CVTDQ2PD,
25059 IX86_BUILTIN_CVTDQ2PS,
25061 IX86_BUILTIN_CVTPD2DQ,
25062 IX86_BUILTIN_CVTPD2PI,
25063 IX86_BUILTIN_CVTPD2PS,
25064 IX86_BUILTIN_CVTTPD2DQ,
25065 IX86_BUILTIN_CVTTPD2PI,
25067 IX86_BUILTIN_CVTPI2PD,
25068 IX86_BUILTIN_CVTSI2SD,
25069 IX86_BUILTIN_CVTSI642SD,
25071 IX86_BUILTIN_CVTSD2SI,
25072 IX86_BUILTIN_CVTSD2SI64,
25073 IX86_BUILTIN_CVTSD2SS,
25074 IX86_BUILTIN_CVTSS2SD,
25075 IX86_BUILTIN_CVTTSD2SI,
25076 IX86_BUILTIN_CVTTSD2SI64,
25078 IX86_BUILTIN_CVTPS2DQ,
25079 IX86_BUILTIN_CVTPS2PD,
25080 IX86_BUILTIN_CVTTPS2DQ,
25082 IX86_BUILTIN_MOVNTI,
25083 IX86_BUILTIN_MOVNTI64,
25084 IX86_BUILTIN_MOVNTPD,
25085 IX86_BUILTIN_MOVNTDQ,
25087 IX86_BUILTIN_MOVQ128,
25090 IX86_BUILTIN_MASKMOVDQU,
25091 IX86_BUILTIN_MOVMSKPD,
25092 IX86_BUILTIN_PMOVMSKB128,
25094 IX86_BUILTIN_PACKSSWB128,
25095 IX86_BUILTIN_PACKSSDW128,
25096 IX86_BUILTIN_PACKUSWB128,
25098 IX86_BUILTIN_PADDB128,
25099 IX86_BUILTIN_PADDW128,
25100 IX86_BUILTIN_PADDD128,
25101 IX86_BUILTIN_PADDQ128,
25102 IX86_BUILTIN_PADDSB128,
25103 IX86_BUILTIN_PADDSW128,
25104 IX86_BUILTIN_PADDUSB128,
25105 IX86_BUILTIN_PADDUSW128,
25106 IX86_BUILTIN_PSUBB128,
25107 IX86_BUILTIN_PSUBW128,
25108 IX86_BUILTIN_PSUBD128,
25109 IX86_BUILTIN_PSUBQ128,
25110 IX86_BUILTIN_PSUBSB128,
25111 IX86_BUILTIN_PSUBSW128,
25112 IX86_BUILTIN_PSUBUSB128,
25113 IX86_BUILTIN_PSUBUSW128,
25115 IX86_BUILTIN_PAND128,
25116 IX86_BUILTIN_PANDN128,
25117 IX86_BUILTIN_POR128,
25118 IX86_BUILTIN_PXOR128,
25120 IX86_BUILTIN_PAVGB128,
25121 IX86_BUILTIN_PAVGW128,
25123 IX86_BUILTIN_PCMPEQB128,
25124 IX86_BUILTIN_PCMPEQW128,
25125 IX86_BUILTIN_PCMPEQD128,
25126 IX86_BUILTIN_PCMPGTB128,
25127 IX86_BUILTIN_PCMPGTW128,
25128 IX86_BUILTIN_PCMPGTD128,
25130 IX86_BUILTIN_PMADDWD128,
25132 IX86_BUILTIN_PMAXSW128,
25133 IX86_BUILTIN_PMAXUB128,
25134 IX86_BUILTIN_PMINSW128,
25135 IX86_BUILTIN_PMINUB128,
25137 IX86_BUILTIN_PMULUDQ,
25138 IX86_BUILTIN_PMULUDQ128,
25139 IX86_BUILTIN_PMULHUW128,
25140 IX86_BUILTIN_PMULHW128,
25141 IX86_BUILTIN_PMULLW128,
25143 IX86_BUILTIN_PSADBW128,
25144 IX86_BUILTIN_PSHUFHW,
25145 IX86_BUILTIN_PSHUFLW,
25146 IX86_BUILTIN_PSHUFD,
25148 IX86_BUILTIN_PSLLDQI128,
25149 IX86_BUILTIN_PSLLWI128,
25150 IX86_BUILTIN_PSLLDI128,
25151 IX86_BUILTIN_PSLLQI128,
25152 IX86_BUILTIN_PSRAWI128,
25153 IX86_BUILTIN_PSRADI128,
25154 IX86_BUILTIN_PSRLDQI128,
25155 IX86_BUILTIN_PSRLWI128,
25156 IX86_BUILTIN_PSRLDI128,
25157 IX86_BUILTIN_PSRLQI128,
25159 IX86_BUILTIN_PSLLDQ128,
25160 IX86_BUILTIN_PSLLW128,
25161 IX86_BUILTIN_PSLLD128,
25162 IX86_BUILTIN_PSLLQ128,
25163 IX86_BUILTIN_PSRAW128,
25164 IX86_BUILTIN_PSRAD128,
25165 IX86_BUILTIN_PSRLW128,
25166 IX86_BUILTIN_PSRLD128,
25167 IX86_BUILTIN_PSRLQ128,
25169 IX86_BUILTIN_PUNPCKHBW128,
25170 IX86_BUILTIN_PUNPCKHWD128,
25171 IX86_BUILTIN_PUNPCKHDQ128,
25172 IX86_BUILTIN_PUNPCKHQDQ128,
25173 IX86_BUILTIN_PUNPCKLBW128,
25174 IX86_BUILTIN_PUNPCKLWD128,
25175 IX86_BUILTIN_PUNPCKLDQ128,
25176 IX86_BUILTIN_PUNPCKLQDQ128,
25178 IX86_BUILTIN_CLFLUSH,
25179 IX86_BUILTIN_MFENCE,
25180 IX86_BUILTIN_LFENCE,
25181 IX86_BUILTIN_PAUSE,
25183 IX86_BUILTIN_BSRSI,
25184 IX86_BUILTIN_BSRDI,
25185 IX86_BUILTIN_RDPMC,
25186 IX86_BUILTIN_RDTSC,
25187 IX86_BUILTIN_RDTSCP,
25188 IX86_BUILTIN_ROLQI,
25189 IX86_BUILTIN_ROLHI,
25190 IX86_BUILTIN_RORQI,
25191 IX86_BUILTIN_RORHI,
25194 IX86_BUILTIN_ADDSUBPS,
25195 IX86_BUILTIN_HADDPS,
25196 IX86_BUILTIN_HSUBPS,
25197 IX86_BUILTIN_MOVSHDUP,
25198 IX86_BUILTIN_MOVSLDUP,
25199 IX86_BUILTIN_ADDSUBPD,
25200 IX86_BUILTIN_HADDPD,
25201 IX86_BUILTIN_HSUBPD,
25202 IX86_BUILTIN_LDDQU,
25204 IX86_BUILTIN_MONITOR,
25205 IX86_BUILTIN_MWAIT,
25208 IX86_BUILTIN_PHADDW,
25209 IX86_BUILTIN_PHADDD,
25210 IX86_BUILTIN_PHADDSW,
25211 IX86_BUILTIN_PHSUBW,
25212 IX86_BUILTIN_PHSUBD,
25213 IX86_BUILTIN_PHSUBSW,
25214 IX86_BUILTIN_PMADDUBSW,
25215 IX86_BUILTIN_PMULHRSW,
25216 IX86_BUILTIN_PSHUFB,
25217 IX86_BUILTIN_PSIGNB,
25218 IX86_BUILTIN_PSIGNW,
25219 IX86_BUILTIN_PSIGND,
25220 IX86_BUILTIN_PALIGNR,
25221 IX86_BUILTIN_PABSB,
25222 IX86_BUILTIN_PABSW,
25223 IX86_BUILTIN_PABSD,
25225 IX86_BUILTIN_PHADDW128,
25226 IX86_BUILTIN_PHADDD128,
25227 IX86_BUILTIN_PHADDSW128,
25228 IX86_BUILTIN_PHSUBW128,
25229 IX86_BUILTIN_PHSUBD128,
25230 IX86_BUILTIN_PHSUBSW128,
25231 IX86_BUILTIN_PMADDUBSW128,
25232 IX86_BUILTIN_PMULHRSW128,
25233 IX86_BUILTIN_PSHUFB128,
25234 IX86_BUILTIN_PSIGNB128,
25235 IX86_BUILTIN_PSIGNW128,
25236 IX86_BUILTIN_PSIGND128,
25237 IX86_BUILTIN_PALIGNR128,
25238 IX86_BUILTIN_PABSB128,
25239 IX86_BUILTIN_PABSW128,
25240 IX86_BUILTIN_PABSD128,
25242 /* AMDFAM10 - SSE4A New Instructions. */
25243 IX86_BUILTIN_MOVNTSD,
25244 IX86_BUILTIN_MOVNTSS,
25245 IX86_BUILTIN_EXTRQI,
25246 IX86_BUILTIN_EXTRQ,
25247 IX86_BUILTIN_INSERTQI,
25248 IX86_BUILTIN_INSERTQ,
25251 IX86_BUILTIN_BLENDPD,
25252 IX86_BUILTIN_BLENDPS,
25253 IX86_BUILTIN_BLENDVPD,
25254 IX86_BUILTIN_BLENDVPS,
25255 IX86_BUILTIN_PBLENDVB128,
25256 IX86_BUILTIN_PBLENDW128,
25261 IX86_BUILTIN_INSERTPS128,
25263 IX86_BUILTIN_MOVNTDQA,
25264 IX86_BUILTIN_MPSADBW128,
25265 IX86_BUILTIN_PACKUSDW128,
25266 IX86_BUILTIN_PCMPEQQ,
25267 IX86_BUILTIN_PHMINPOSUW128,
25269 IX86_BUILTIN_PMAXSB128,
25270 IX86_BUILTIN_PMAXSD128,
25271 IX86_BUILTIN_PMAXUD128,
25272 IX86_BUILTIN_PMAXUW128,
25274 IX86_BUILTIN_PMINSB128,
25275 IX86_BUILTIN_PMINSD128,
25276 IX86_BUILTIN_PMINUD128,
25277 IX86_BUILTIN_PMINUW128,
25279 IX86_BUILTIN_PMOVSXBW128,
25280 IX86_BUILTIN_PMOVSXBD128,
25281 IX86_BUILTIN_PMOVSXBQ128,
25282 IX86_BUILTIN_PMOVSXWD128,
25283 IX86_BUILTIN_PMOVSXWQ128,
25284 IX86_BUILTIN_PMOVSXDQ128,
25286 IX86_BUILTIN_PMOVZXBW128,
25287 IX86_BUILTIN_PMOVZXBD128,
25288 IX86_BUILTIN_PMOVZXBQ128,
25289 IX86_BUILTIN_PMOVZXWD128,
25290 IX86_BUILTIN_PMOVZXWQ128,
25291 IX86_BUILTIN_PMOVZXDQ128,
25293 IX86_BUILTIN_PMULDQ128,
25294 IX86_BUILTIN_PMULLD128,
25296 IX86_BUILTIN_ROUNDSD,
25297 IX86_BUILTIN_ROUNDSS,
25299 IX86_BUILTIN_ROUNDPD,
25300 IX86_BUILTIN_ROUNDPS,
25302 IX86_BUILTIN_FLOORPD,
25303 IX86_BUILTIN_CEILPD,
25304 IX86_BUILTIN_TRUNCPD,
25305 IX86_BUILTIN_RINTPD,
25306 IX86_BUILTIN_ROUNDPD_AZ,
25308 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
25309 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
25310 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
25312 IX86_BUILTIN_FLOORPS,
25313 IX86_BUILTIN_CEILPS,
25314 IX86_BUILTIN_TRUNCPS,
25315 IX86_BUILTIN_RINTPS,
25316 IX86_BUILTIN_ROUNDPS_AZ,
25318 IX86_BUILTIN_FLOORPS_SFIX,
25319 IX86_BUILTIN_CEILPS_SFIX,
25320 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
25322 IX86_BUILTIN_PTESTZ,
25323 IX86_BUILTIN_PTESTC,
25324 IX86_BUILTIN_PTESTNZC,
25326 IX86_BUILTIN_VEC_INIT_V2SI,
25327 IX86_BUILTIN_VEC_INIT_V4HI,
25328 IX86_BUILTIN_VEC_INIT_V8QI,
25329 IX86_BUILTIN_VEC_EXT_V2DF,
25330 IX86_BUILTIN_VEC_EXT_V2DI,
25331 IX86_BUILTIN_VEC_EXT_V4SF,
25332 IX86_BUILTIN_VEC_EXT_V4SI,
25333 IX86_BUILTIN_VEC_EXT_V8HI,
25334 IX86_BUILTIN_VEC_EXT_V2SI,
25335 IX86_BUILTIN_VEC_EXT_V4HI,
25336 IX86_BUILTIN_VEC_EXT_V16QI,
25337 IX86_BUILTIN_VEC_SET_V2DI,
25338 IX86_BUILTIN_VEC_SET_V4SF,
25339 IX86_BUILTIN_VEC_SET_V4SI,
25340 IX86_BUILTIN_VEC_SET_V8HI,
25341 IX86_BUILTIN_VEC_SET_V4HI,
25342 IX86_BUILTIN_VEC_SET_V16QI,
25344 IX86_BUILTIN_VEC_PACK_SFIX,
25345 IX86_BUILTIN_VEC_PACK_SFIX256,
25348 IX86_BUILTIN_CRC32QI,
25349 IX86_BUILTIN_CRC32HI,
25350 IX86_BUILTIN_CRC32SI,
25351 IX86_BUILTIN_CRC32DI,
25353 IX86_BUILTIN_PCMPESTRI128,
25354 IX86_BUILTIN_PCMPESTRM128,
25355 IX86_BUILTIN_PCMPESTRA128,
25356 IX86_BUILTIN_PCMPESTRC128,
25357 IX86_BUILTIN_PCMPESTRO128,
25358 IX86_BUILTIN_PCMPESTRS128,
25359 IX86_BUILTIN_PCMPESTRZ128,
25360 IX86_BUILTIN_PCMPISTRI128,
25361 IX86_BUILTIN_PCMPISTRM128,
25362 IX86_BUILTIN_PCMPISTRA128,
25363 IX86_BUILTIN_PCMPISTRC128,
25364 IX86_BUILTIN_PCMPISTRO128,
25365 IX86_BUILTIN_PCMPISTRS128,
25366 IX86_BUILTIN_PCMPISTRZ128,
25368 IX86_BUILTIN_PCMPGTQ,
25370 /* AES instructions */
25371 IX86_BUILTIN_AESENC128,
25372 IX86_BUILTIN_AESENCLAST128,
25373 IX86_BUILTIN_AESDEC128,
25374 IX86_BUILTIN_AESDECLAST128,
25375 IX86_BUILTIN_AESIMC128,
25376 IX86_BUILTIN_AESKEYGENASSIST128,
25378 /* PCLMUL instruction */
25379 IX86_BUILTIN_PCLMULQDQ128,
25382 IX86_BUILTIN_ADDPD256,
25383 IX86_BUILTIN_ADDPS256,
25384 IX86_BUILTIN_ADDSUBPD256,
25385 IX86_BUILTIN_ADDSUBPS256,
25386 IX86_BUILTIN_ANDPD256,
25387 IX86_BUILTIN_ANDPS256,
25388 IX86_BUILTIN_ANDNPD256,
25389 IX86_BUILTIN_ANDNPS256,
25390 IX86_BUILTIN_BLENDPD256,
25391 IX86_BUILTIN_BLENDPS256,
25392 IX86_BUILTIN_BLENDVPD256,
25393 IX86_BUILTIN_BLENDVPS256,
25394 IX86_BUILTIN_DIVPD256,
25395 IX86_BUILTIN_DIVPS256,
25396 IX86_BUILTIN_DPPS256,
25397 IX86_BUILTIN_HADDPD256,
25398 IX86_BUILTIN_HADDPS256,
25399 IX86_BUILTIN_HSUBPD256,
25400 IX86_BUILTIN_HSUBPS256,
25401 IX86_BUILTIN_MAXPD256,
25402 IX86_BUILTIN_MAXPS256,
25403 IX86_BUILTIN_MINPD256,
25404 IX86_BUILTIN_MINPS256,
25405 IX86_BUILTIN_MULPD256,
25406 IX86_BUILTIN_MULPS256,
25407 IX86_BUILTIN_ORPD256,
25408 IX86_BUILTIN_ORPS256,
25409 IX86_BUILTIN_SHUFPD256,
25410 IX86_BUILTIN_SHUFPS256,
25411 IX86_BUILTIN_SUBPD256,
25412 IX86_BUILTIN_SUBPS256,
25413 IX86_BUILTIN_XORPD256,
25414 IX86_BUILTIN_XORPS256,
25415 IX86_BUILTIN_CMPSD,
25416 IX86_BUILTIN_CMPSS,
25417 IX86_BUILTIN_CMPPD,
25418 IX86_BUILTIN_CMPPS,
25419 IX86_BUILTIN_CMPPD256,
25420 IX86_BUILTIN_CMPPS256,
25421 IX86_BUILTIN_CVTDQ2PD256,
25422 IX86_BUILTIN_CVTDQ2PS256,
25423 IX86_BUILTIN_CVTPD2PS256,
25424 IX86_BUILTIN_CVTPS2DQ256,
25425 IX86_BUILTIN_CVTPS2PD256,
25426 IX86_BUILTIN_CVTTPD2DQ256,
25427 IX86_BUILTIN_CVTPD2DQ256,
25428 IX86_BUILTIN_CVTTPS2DQ256,
25429 IX86_BUILTIN_EXTRACTF128PD256,
25430 IX86_BUILTIN_EXTRACTF128PS256,
25431 IX86_BUILTIN_EXTRACTF128SI256,
25432 IX86_BUILTIN_VZEROALL,
25433 IX86_BUILTIN_VZEROUPPER,
25434 IX86_BUILTIN_VPERMILVARPD,
25435 IX86_BUILTIN_VPERMILVARPS,
25436 IX86_BUILTIN_VPERMILVARPD256,
25437 IX86_BUILTIN_VPERMILVARPS256,
25438 IX86_BUILTIN_VPERMILPD,
25439 IX86_BUILTIN_VPERMILPS,
25440 IX86_BUILTIN_VPERMILPD256,
25441 IX86_BUILTIN_VPERMILPS256,
25442 IX86_BUILTIN_VPERMIL2PD,
25443 IX86_BUILTIN_VPERMIL2PS,
25444 IX86_BUILTIN_VPERMIL2PD256,
25445 IX86_BUILTIN_VPERMIL2PS256,
25446 IX86_BUILTIN_VPERM2F128PD256,
25447 IX86_BUILTIN_VPERM2F128PS256,
25448 IX86_BUILTIN_VPERM2F128SI256,
25449 IX86_BUILTIN_VBROADCASTSS,
25450 IX86_BUILTIN_VBROADCASTSD256,
25451 IX86_BUILTIN_VBROADCASTSS256,
25452 IX86_BUILTIN_VBROADCASTPD256,
25453 IX86_BUILTIN_VBROADCASTPS256,
25454 IX86_BUILTIN_VINSERTF128PD256,
25455 IX86_BUILTIN_VINSERTF128PS256,
25456 IX86_BUILTIN_VINSERTF128SI256,
25457 IX86_BUILTIN_LOADUPD256,
25458 IX86_BUILTIN_LOADUPS256,
25459 IX86_BUILTIN_STOREUPD256,
25460 IX86_BUILTIN_STOREUPS256,
25461 IX86_BUILTIN_LDDQU256,
25462 IX86_BUILTIN_MOVNTDQ256,
25463 IX86_BUILTIN_MOVNTPD256,
25464 IX86_BUILTIN_MOVNTPS256,
25465 IX86_BUILTIN_LOADDQU256,
25466 IX86_BUILTIN_STOREDQU256,
25467 IX86_BUILTIN_MASKLOADPD,
25468 IX86_BUILTIN_MASKLOADPS,
25469 IX86_BUILTIN_MASKSTOREPD,
25470 IX86_BUILTIN_MASKSTOREPS,
25471 IX86_BUILTIN_MASKLOADPD256,
25472 IX86_BUILTIN_MASKLOADPS256,
25473 IX86_BUILTIN_MASKSTOREPD256,
25474 IX86_BUILTIN_MASKSTOREPS256,
25475 IX86_BUILTIN_MOVSHDUP256,
25476 IX86_BUILTIN_MOVSLDUP256,
25477 IX86_BUILTIN_MOVDDUP256,
25479 IX86_BUILTIN_SQRTPD256,
25480 IX86_BUILTIN_SQRTPS256,
25481 IX86_BUILTIN_SQRTPS_NR256,
25482 IX86_BUILTIN_RSQRTPS256,
25483 IX86_BUILTIN_RSQRTPS_NR256,
25485 IX86_BUILTIN_RCPPS256,
25487 IX86_BUILTIN_ROUNDPD256,
25488 IX86_BUILTIN_ROUNDPS256,
25490 IX86_BUILTIN_FLOORPD256,
25491 IX86_BUILTIN_CEILPD256,
25492 IX86_BUILTIN_TRUNCPD256,
25493 IX86_BUILTIN_RINTPD256,
25494 IX86_BUILTIN_ROUNDPD_AZ256,
25496 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
25497 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
25498 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
25500 IX86_BUILTIN_FLOORPS256,
25501 IX86_BUILTIN_CEILPS256,
25502 IX86_BUILTIN_TRUNCPS256,
25503 IX86_BUILTIN_RINTPS256,
25504 IX86_BUILTIN_ROUNDPS_AZ256,
25506 IX86_BUILTIN_FLOORPS_SFIX256,
25507 IX86_BUILTIN_CEILPS_SFIX256,
25508 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
25510 IX86_BUILTIN_UNPCKHPD256,
25511 IX86_BUILTIN_UNPCKLPD256,
25512 IX86_BUILTIN_UNPCKHPS256,
25513 IX86_BUILTIN_UNPCKLPS256,
25515 IX86_BUILTIN_SI256_SI,
25516 IX86_BUILTIN_PS256_PS,
25517 IX86_BUILTIN_PD256_PD,
25518 IX86_BUILTIN_SI_SI256,
25519 IX86_BUILTIN_PS_PS256,
25520 IX86_BUILTIN_PD_PD256,
25522 IX86_BUILTIN_VTESTZPD,
25523 IX86_BUILTIN_VTESTCPD,
25524 IX86_BUILTIN_VTESTNZCPD,
25525 IX86_BUILTIN_VTESTZPS,
25526 IX86_BUILTIN_VTESTCPS,
25527 IX86_BUILTIN_VTESTNZCPS,
25528 IX86_BUILTIN_VTESTZPD256,
25529 IX86_BUILTIN_VTESTCPD256,
25530 IX86_BUILTIN_VTESTNZCPD256,
25531 IX86_BUILTIN_VTESTZPS256,
25532 IX86_BUILTIN_VTESTCPS256,
25533 IX86_BUILTIN_VTESTNZCPS256,
25534 IX86_BUILTIN_PTESTZ256,
25535 IX86_BUILTIN_PTESTC256,
25536 IX86_BUILTIN_PTESTNZC256,
25538 IX86_BUILTIN_MOVMSKPD256,
25539 IX86_BUILTIN_MOVMSKPS256,
25542 IX86_BUILTIN_MPSADBW256,
25543 IX86_BUILTIN_PABSB256,
25544 IX86_BUILTIN_PABSW256,
25545 IX86_BUILTIN_PABSD256,
25546 IX86_BUILTIN_PACKSSDW256,
25547 IX86_BUILTIN_PACKSSWB256,
25548 IX86_BUILTIN_PACKUSDW256,
25549 IX86_BUILTIN_PACKUSWB256,
25550 IX86_BUILTIN_PADDB256,
25551 IX86_BUILTIN_PADDW256,
25552 IX86_BUILTIN_PADDD256,
25553 IX86_BUILTIN_PADDQ256,
25554 IX86_BUILTIN_PADDSB256,
25555 IX86_BUILTIN_PADDSW256,
25556 IX86_BUILTIN_PADDUSB256,
25557 IX86_BUILTIN_PADDUSW256,
25558 IX86_BUILTIN_PALIGNR256,
25559 IX86_BUILTIN_AND256I,
25560 IX86_BUILTIN_ANDNOT256I,
25561 IX86_BUILTIN_PAVGB256,
25562 IX86_BUILTIN_PAVGW256,
25563 IX86_BUILTIN_PBLENDVB256,
25564 IX86_BUILTIN_PBLENDVW256,
25565 IX86_BUILTIN_PCMPEQB256,
25566 IX86_BUILTIN_PCMPEQW256,
25567 IX86_BUILTIN_PCMPEQD256,
25568 IX86_BUILTIN_PCMPEQQ256,
25569 IX86_BUILTIN_PCMPGTB256,
25570 IX86_BUILTIN_PCMPGTW256,
25571 IX86_BUILTIN_PCMPGTD256,
25572 IX86_BUILTIN_PCMPGTQ256,
25573 IX86_BUILTIN_PHADDW256,
25574 IX86_BUILTIN_PHADDD256,
25575 IX86_BUILTIN_PHADDSW256,
25576 IX86_BUILTIN_PHSUBW256,
25577 IX86_BUILTIN_PHSUBD256,
25578 IX86_BUILTIN_PHSUBSW256,
25579 IX86_BUILTIN_PMADDUBSW256,
25580 IX86_BUILTIN_PMADDWD256,
25581 IX86_BUILTIN_PMAXSB256,
25582 IX86_BUILTIN_PMAXSW256,
25583 IX86_BUILTIN_PMAXSD256,
25584 IX86_BUILTIN_PMAXUB256,
25585 IX86_BUILTIN_PMAXUW256,
25586 IX86_BUILTIN_PMAXUD256,
25587 IX86_BUILTIN_PMINSB256,
25588 IX86_BUILTIN_PMINSW256,
25589 IX86_BUILTIN_PMINSD256,
25590 IX86_BUILTIN_PMINUB256,
25591 IX86_BUILTIN_PMINUW256,
25592 IX86_BUILTIN_PMINUD256,
25593 IX86_BUILTIN_PMOVMSKB256,
25594 IX86_BUILTIN_PMOVSXBW256,
25595 IX86_BUILTIN_PMOVSXBD256,
25596 IX86_BUILTIN_PMOVSXBQ256,
25597 IX86_BUILTIN_PMOVSXWD256,
25598 IX86_BUILTIN_PMOVSXWQ256,
25599 IX86_BUILTIN_PMOVSXDQ256,
25600 IX86_BUILTIN_PMOVZXBW256,
25601 IX86_BUILTIN_PMOVZXBD256,
25602 IX86_BUILTIN_PMOVZXBQ256,
25603 IX86_BUILTIN_PMOVZXWD256,
25604 IX86_BUILTIN_PMOVZXWQ256,
25605 IX86_BUILTIN_PMOVZXDQ256,
25606 IX86_BUILTIN_PMULDQ256,
25607 IX86_BUILTIN_PMULHRSW256,
25608 IX86_BUILTIN_PMULHUW256,
25609 IX86_BUILTIN_PMULHW256,
25610 IX86_BUILTIN_PMULLW256,
25611 IX86_BUILTIN_PMULLD256,
25612 IX86_BUILTIN_PMULUDQ256,
25613 IX86_BUILTIN_POR256,
25614 IX86_BUILTIN_PSADBW256,
25615 IX86_BUILTIN_PSHUFB256,
25616 IX86_BUILTIN_PSHUFD256,
25617 IX86_BUILTIN_PSHUFHW256,
25618 IX86_BUILTIN_PSHUFLW256,
25619 IX86_BUILTIN_PSIGNB256,
25620 IX86_BUILTIN_PSIGNW256,
25621 IX86_BUILTIN_PSIGND256,
25622 IX86_BUILTIN_PSLLDQI256,
25623 IX86_BUILTIN_PSLLWI256,
25624 IX86_BUILTIN_PSLLW256,
25625 IX86_BUILTIN_PSLLDI256,
25626 IX86_BUILTIN_PSLLD256,
25627 IX86_BUILTIN_PSLLQI256,
25628 IX86_BUILTIN_PSLLQ256,
25629 IX86_BUILTIN_PSRAWI256,
25630 IX86_BUILTIN_PSRAW256,
25631 IX86_BUILTIN_PSRADI256,
25632 IX86_BUILTIN_PSRAD256,
25633 IX86_BUILTIN_PSRLDQI256,
25634 IX86_BUILTIN_PSRLWI256,
25635 IX86_BUILTIN_PSRLW256,
25636 IX86_BUILTIN_PSRLDI256,
25637 IX86_BUILTIN_PSRLD256,
25638 IX86_BUILTIN_PSRLQI256,
25639 IX86_BUILTIN_PSRLQ256,
25640 IX86_BUILTIN_PSUBB256,
25641 IX86_BUILTIN_PSUBW256,
25642 IX86_BUILTIN_PSUBD256,
25643 IX86_BUILTIN_PSUBQ256,
25644 IX86_BUILTIN_PSUBSB256,
25645 IX86_BUILTIN_PSUBSW256,
25646 IX86_BUILTIN_PSUBUSB256,
25647 IX86_BUILTIN_PSUBUSW256,
25648 IX86_BUILTIN_PUNPCKHBW256,
25649 IX86_BUILTIN_PUNPCKHWD256,
25650 IX86_BUILTIN_PUNPCKHDQ256,
25651 IX86_BUILTIN_PUNPCKHQDQ256,
25652 IX86_BUILTIN_PUNPCKLBW256,
25653 IX86_BUILTIN_PUNPCKLWD256,
25654 IX86_BUILTIN_PUNPCKLDQ256,
25655 IX86_BUILTIN_PUNPCKLQDQ256,
25656 IX86_BUILTIN_PXOR256,
25657 IX86_BUILTIN_MOVNTDQA256,
25658 IX86_BUILTIN_VBROADCASTSS_PS,
25659 IX86_BUILTIN_VBROADCASTSS_PS256,
25660 IX86_BUILTIN_VBROADCASTSD_PD256,
25661 IX86_BUILTIN_VBROADCASTSI256,
25662 IX86_BUILTIN_PBLENDD256,
25663 IX86_BUILTIN_PBLENDD128,
25664 IX86_BUILTIN_PBROADCASTB256,
25665 IX86_BUILTIN_PBROADCASTW256,
25666 IX86_BUILTIN_PBROADCASTD256,
25667 IX86_BUILTIN_PBROADCASTQ256,
25668 IX86_BUILTIN_PBROADCASTB128,
25669 IX86_BUILTIN_PBROADCASTW128,
25670 IX86_BUILTIN_PBROADCASTD128,
25671 IX86_BUILTIN_PBROADCASTQ128,
25672 IX86_BUILTIN_VPERMVARSI256,
25673 IX86_BUILTIN_VPERMDF256,
25674 IX86_BUILTIN_VPERMVARSF256,
25675 IX86_BUILTIN_VPERMDI256,
25676 IX86_BUILTIN_VPERMTI256,
25677 IX86_BUILTIN_VEXTRACT128I256,
25678 IX86_BUILTIN_VINSERT128I256,
25679 IX86_BUILTIN_MASKLOADD,
25680 IX86_BUILTIN_MASKLOADQ,
25681 IX86_BUILTIN_MASKLOADD256,
25682 IX86_BUILTIN_MASKLOADQ256,
25683 IX86_BUILTIN_MASKSTORED,
25684 IX86_BUILTIN_MASKSTOREQ,
25685 IX86_BUILTIN_MASKSTORED256,
25686 IX86_BUILTIN_MASKSTOREQ256,
25687 IX86_BUILTIN_PSLLVV4DI,
25688 IX86_BUILTIN_PSLLVV2DI,
25689 IX86_BUILTIN_PSLLVV8SI,
25690 IX86_BUILTIN_PSLLVV4SI,
25691 IX86_BUILTIN_PSRAVV8SI,
25692 IX86_BUILTIN_PSRAVV4SI,
25693 IX86_BUILTIN_PSRLVV4DI,
25694 IX86_BUILTIN_PSRLVV2DI,
25695 IX86_BUILTIN_PSRLVV8SI,
25696 IX86_BUILTIN_PSRLVV4SI,
25698 IX86_BUILTIN_GATHERSIV2DF,
25699 IX86_BUILTIN_GATHERSIV4DF,
25700 IX86_BUILTIN_GATHERDIV2DF,
25701 IX86_BUILTIN_GATHERDIV4DF,
25702 IX86_BUILTIN_GATHERSIV4SF,
25703 IX86_BUILTIN_GATHERSIV8SF,
25704 IX86_BUILTIN_GATHERDIV4SF,
25705 IX86_BUILTIN_GATHERDIV8SF,
25706 IX86_BUILTIN_GATHERSIV2DI,
25707 IX86_BUILTIN_GATHERSIV4DI,
25708 IX86_BUILTIN_GATHERDIV2DI,
25709 IX86_BUILTIN_GATHERDIV4DI,
25710 IX86_BUILTIN_GATHERSIV4SI,
25711 IX86_BUILTIN_GATHERSIV8SI,
25712 IX86_BUILTIN_GATHERDIV4SI,
25713 IX86_BUILTIN_GATHERDIV8SI,
25715 /* Alternate 4 element gather for the vectorizer where
25716 all operands are 32-byte wide. */
25717 IX86_BUILTIN_GATHERALTSIV4DF,
25718 IX86_BUILTIN_GATHERALTDIV8SF,
25719 IX86_BUILTIN_GATHERALTSIV4DI,
25720 IX86_BUILTIN_GATHERALTDIV8SI,
25722 /* TFmode support builtins. */
25724 IX86_BUILTIN_HUGE_VALQ,
25725 IX86_BUILTIN_FABSQ,
25726 IX86_BUILTIN_COPYSIGNQ,
25728 /* Vectorizer support builtins. */
25729 IX86_BUILTIN_CPYSGNPS,
25730 IX86_BUILTIN_CPYSGNPD,
25731 IX86_BUILTIN_CPYSGNPS256,
25732 IX86_BUILTIN_CPYSGNPD256,
25734 /* FMA4 instructions. */
25735 IX86_BUILTIN_VFMADDSS,
25736 IX86_BUILTIN_VFMADDSD,
25737 IX86_BUILTIN_VFMADDPS,
25738 IX86_BUILTIN_VFMADDPD,
25739 IX86_BUILTIN_VFMADDPS256,
25740 IX86_BUILTIN_VFMADDPD256,
25741 IX86_BUILTIN_VFMADDSUBPS,
25742 IX86_BUILTIN_VFMADDSUBPD,
25743 IX86_BUILTIN_VFMADDSUBPS256,
25744 IX86_BUILTIN_VFMADDSUBPD256,
25746 /* FMA3 instructions. */
25747 IX86_BUILTIN_VFMADDSS3,
25748 IX86_BUILTIN_VFMADDSD3,
25750 /* XOP instructions. */
25751 IX86_BUILTIN_VPCMOV,
25752 IX86_BUILTIN_VPCMOV_V2DI,
25753 IX86_BUILTIN_VPCMOV_V4SI,
25754 IX86_BUILTIN_VPCMOV_V8HI,
25755 IX86_BUILTIN_VPCMOV_V16QI,
25756 IX86_BUILTIN_VPCMOV_V4SF,
25757 IX86_BUILTIN_VPCMOV_V2DF,
25758 IX86_BUILTIN_VPCMOV256,
25759 IX86_BUILTIN_VPCMOV_V4DI256,
25760 IX86_BUILTIN_VPCMOV_V8SI256,
25761 IX86_BUILTIN_VPCMOV_V16HI256,
25762 IX86_BUILTIN_VPCMOV_V32QI256,
25763 IX86_BUILTIN_VPCMOV_V8SF256,
25764 IX86_BUILTIN_VPCMOV_V4DF256,
25766 IX86_BUILTIN_VPPERM,
25768 IX86_BUILTIN_VPMACSSWW,
25769 IX86_BUILTIN_VPMACSWW,
25770 IX86_BUILTIN_VPMACSSWD,
25771 IX86_BUILTIN_VPMACSWD,
25772 IX86_BUILTIN_VPMACSSDD,
25773 IX86_BUILTIN_VPMACSDD,
25774 IX86_BUILTIN_VPMACSSDQL,
25775 IX86_BUILTIN_VPMACSSDQH,
25776 IX86_BUILTIN_VPMACSDQL,
25777 IX86_BUILTIN_VPMACSDQH,
25778 IX86_BUILTIN_VPMADCSSWD,
25779 IX86_BUILTIN_VPMADCSWD,
25781 IX86_BUILTIN_VPHADDBW,
25782 IX86_BUILTIN_VPHADDBD,
25783 IX86_BUILTIN_VPHADDBQ,
25784 IX86_BUILTIN_VPHADDWD,
25785 IX86_BUILTIN_VPHADDWQ,
25786 IX86_BUILTIN_VPHADDDQ,
25787 IX86_BUILTIN_VPHADDUBW,
25788 IX86_BUILTIN_VPHADDUBD,
25789 IX86_BUILTIN_VPHADDUBQ,
25790 IX86_BUILTIN_VPHADDUWD,
25791 IX86_BUILTIN_VPHADDUWQ,
25792 IX86_BUILTIN_VPHADDUDQ,
25793 IX86_BUILTIN_VPHSUBBW,
25794 IX86_BUILTIN_VPHSUBWD,
25795 IX86_BUILTIN_VPHSUBDQ,
25797 IX86_BUILTIN_VPROTB,
25798 IX86_BUILTIN_VPROTW,
25799 IX86_BUILTIN_VPROTD,
25800 IX86_BUILTIN_VPROTQ,
25801 IX86_BUILTIN_VPROTB_IMM,
25802 IX86_BUILTIN_VPROTW_IMM,
25803 IX86_BUILTIN_VPROTD_IMM,
25804 IX86_BUILTIN_VPROTQ_IMM,
25806 IX86_BUILTIN_VPSHLB,
25807 IX86_BUILTIN_VPSHLW,
25808 IX86_BUILTIN_VPSHLD,
25809 IX86_BUILTIN_VPSHLQ,
25810 IX86_BUILTIN_VPSHAB,
25811 IX86_BUILTIN_VPSHAW,
25812 IX86_BUILTIN_VPSHAD,
25813 IX86_BUILTIN_VPSHAQ,
25815 IX86_BUILTIN_VFRCZSS,
25816 IX86_BUILTIN_VFRCZSD,
25817 IX86_BUILTIN_VFRCZPS,
25818 IX86_BUILTIN_VFRCZPD,
25819 IX86_BUILTIN_VFRCZPS256,
25820 IX86_BUILTIN_VFRCZPD256,
25822 IX86_BUILTIN_VPCOMEQUB,
25823 IX86_BUILTIN_VPCOMNEUB,
25824 IX86_BUILTIN_VPCOMLTUB,
25825 IX86_BUILTIN_VPCOMLEUB,
25826 IX86_BUILTIN_VPCOMGTUB,
25827 IX86_BUILTIN_VPCOMGEUB,
25828 IX86_BUILTIN_VPCOMFALSEUB,
25829 IX86_BUILTIN_VPCOMTRUEUB,
25831 IX86_BUILTIN_VPCOMEQUW,
25832 IX86_BUILTIN_VPCOMNEUW,
25833 IX86_BUILTIN_VPCOMLTUW,
25834 IX86_BUILTIN_VPCOMLEUW,
25835 IX86_BUILTIN_VPCOMGTUW,
25836 IX86_BUILTIN_VPCOMGEUW,
25837 IX86_BUILTIN_VPCOMFALSEUW,
25838 IX86_BUILTIN_VPCOMTRUEUW,
25840 IX86_BUILTIN_VPCOMEQUD,
25841 IX86_BUILTIN_VPCOMNEUD,
25842 IX86_BUILTIN_VPCOMLTUD,
25843 IX86_BUILTIN_VPCOMLEUD,
25844 IX86_BUILTIN_VPCOMGTUD,
25845 IX86_BUILTIN_VPCOMGEUD,
25846 IX86_BUILTIN_VPCOMFALSEUD,
25847 IX86_BUILTIN_VPCOMTRUEUD,
25849 IX86_BUILTIN_VPCOMEQUQ,
25850 IX86_BUILTIN_VPCOMNEUQ,
25851 IX86_BUILTIN_VPCOMLTUQ,
25852 IX86_BUILTIN_VPCOMLEUQ,
25853 IX86_BUILTIN_VPCOMGTUQ,
25854 IX86_BUILTIN_VPCOMGEUQ,
25855 IX86_BUILTIN_VPCOMFALSEUQ,
25856 IX86_BUILTIN_VPCOMTRUEUQ,
25858 IX86_BUILTIN_VPCOMEQB,
25859 IX86_BUILTIN_VPCOMNEB,
25860 IX86_BUILTIN_VPCOMLTB,
25861 IX86_BUILTIN_VPCOMLEB,
25862 IX86_BUILTIN_VPCOMGTB,
25863 IX86_BUILTIN_VPCOMGEB,
25864 IX86_BUILTIN_VPCOMFALSEB,
25865 IX86_BUILTIN_VPCOMTRUEB,
25867 IX86_BUILTIN_VPCOMEQW,
25868 IX86_BUILTIN_VPCOMNEW,
25869 IX86_BUILTIN_VPCOMLTW,
25870 IX86_BUILTIN_VPCOMLEW,
25871 IX86_BUILTIN_VPCOMGTW,
25872 IX86_BUILTIN_VPCOMGEW,
25873 IX86_BUILTIN_VPCOMFALSEW,
25874 IX86_BUILTIN_VPCOMTRUEW,
25876 IX86_BUILTIN_VPCOMEQD,
25877 IX86_BUILTIN_VPCOMNED,
25878 IX86_BUILTIN_VPCOMLTD,
25879 IX86_BUILTIN_VPCOMLED,
25880 IX86_BUILTIN_VPCOMGTD,
25881 IX86_BUILTIN_VPCOMGED,
25882 IX86_BUILTIN_VPCOMFALSED,
25883 IX86_BUILTIN_VPCOMTRUED,
25885 IX86_BUILTIN_VPCOMEQQ,
25886 IX86_BUILTIN_VPCOMNEQ,
25887 IX86_BUILTIN_VPCOMLTQ,
25888 IX86_BUILTIN_VPCOMLEQ,
25889 IX86_BUILTIN_VPCOMGTQ,
25890 IX86_BUILTIN_VPCOMGEQ,
25891 IX86_BUILTIN_VPCOMFALSEQ,
25892 IX86_BUILTIN_VPCOMTRUEQ,
25894 /* LWP instructions. */
25895 IX86_BUILTIN_LLWPCB,
25896 IX86_BUILTIN_SLWPCB,
25897 IX86_BUILTIN_LWPVAL32,
25898 IX86_BUILTIN_LWPVAL64,
25899 IX86_BUILTIN_LWPINS32,
25900 IX86_BUILTIN_LWPINS64,
25904 /* BMI instructions. */
25905 IX86_BUILTIN_BEXTR32,
25906 IX86_BUILTIN_BEXTR64,
25909 /* TBM instructions. */
25910 IX86_BUILTIN_BEXTRI32,
25911 IX86_BUILTIN_BEXTRI64,
25913 /* BMI2 instructions. */
25914 IX86_BUILTIN_BZHI32,
25915 IX86_BUILTIN_BZHI64,
25916 IX86_BUILTIN_PDEP32,
25917 IX86_BUILTIN_PDEP64,
25918 IX86_BUILTIN_PEXT32,
25919 IX86_BUILTIN_PEXT64,
25921 /* FSGSBASE instructions. */
25922 IX86_BUILTIN_RDFSBASE32,
25923 IX86_BUILTIN_RDFSBASE64,
25924 IX86_BUILTIN_RDGSBASE32,
25925 IX86_BUILTIN_RDGSBASE64,
25926 IX86_BUILTIN_WRFSBASE32,
25927 IX86_BUILTIN_WRFSBASE64,
25928 IX86_BUILTIN_WRGSBASE32,
25929 IX86_BUILTIN_WRGSBASE64,
25931 /* RDRND instructions. */
25932 IX86_BUILTIN_RDRAND16_STEP,
25933 IX86_BUILTIN_RDRAND32_STEP,
25934 IX86_BUILTIN_RDRAND64_STEP,
25936 /* F16C instructions. */
25937 IX86_BUILTIN_CVTPH2PS,
25938 IX86_BUILTIN_CVTPH2PS256,
25939 IX86_BUILTIN_CVTPS2PH,
25940 IX86_BUILTIN_CVTPS2PH256,
25942 /* CFString built-in for darwin */
25943 IX86_BUILTIN_CFSTRING,
25948 /* Table for the ix86 builtin decls. */
25949 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
25951 /* Table of all of the builtin functions that are possible with different ISA's
25952 but are waiting to be built until a function is declared to use that
25954 struct builtin_isa {
25955 const char *name; /* function name */
25956 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
25957 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
25958 bool const_p; /* true if the declaration is constant */
25959 bool set_and_not_built_p;
25962 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
25965 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25966 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25967 function decl in the ix86_builtins array. Returns the function decl or
25968 NULL_TREE, if the builtin was not added.
25970 If the front end has a special hook for builtin functions, delay adding
25971 builtin functions that aren't in the current ISA until the ISA is changed
25972 with function specific optimization. Doing so, can save about 300K for the
25973 default compiler. When the builtin is expanded, check at that time whether
25976 If the front end doesn't have a special hook, record all builtins, even if
25977 it isn't an instruction set in the current ISA in case the user uses
25978 function specific options for a different ISA, so that we don't get scope
25979 errors if a builtin is added in the middle of a function scope. */
25982 def_builtin (HOST_WIDE_INT mask, const char *name,
25983 enum ix86_builtin_func_type tcode,
25984 enum ix86_builtins code)
25986 tree decl = NULL_TREE;
25988 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
25990 ix86_builtins_isa[(int) code].isa = mask;
25992 mask &= ~OPTION_MASK_ISA_64BIT;
25994 || (mask & ix86_isa_flags) != 0
25995 || (lang_hooks.builtin_function
25996 == lang_hooks.builtin_function_ext_scope))
25999 tree type = ix86_get_builtin_func_type (tcode);
26000 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
26002 ix86_builtins[(int) code] = decl;
26003 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
26007 ix86_builtins[(int) code] = NULL_TREE;
26008 ix86_builtins_isa[(int) code].tcode = tcode;
26009 ix86_builtins_isa[(int) code].name = name;
26010 ix86_builtins_isa[(int) code].const_p = false;
26011 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
26018 /* Like def_builtin, but also marks the function decl "const". */
26021 def_builtin_const (HOST_WIDE_INT mask, const char *name,
26022 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
26024 tree decl = def_builtin (mask, name, tcode, code);
26026 TREE_READONLY (decl) = 1;
26028 ix86_builtins_isa[(int) code].const_p = true;
26033 /* Add any new builtin functions for a given ISA that may not have been
26034 declared. This saves a bit of space compared to adding all of the
26035 declarations to the tree, even if we didn't use them. */
26038 ix86_add_new_builtins (HOST_WIDE_INT isa)
26042 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
26044 if ((ix86_builtins_isa[i].isa & isa) != 0
26045 && ix86_builtins_isa[i].set_and_not_built_p)
26049 /* Don't define the builtin again. */
26050 ix86_builtins_isa[i].set_and_not_built_p = false;
26052 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
26053 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
26054 type, i, BUILT_IN_MD, NULL,
26057 ix86_builtins[i] = decl;
26058 if (ix86_builtins_isa[i].const_p)
26059 TREE_READONLY (decl) = 1;
26064 /* Bits for builtin_description.flag. */
26066 /* Set when we don't support the comparison natively, and should
26067 swap_comparison in order to support it. */
26068 #define BUILTIN_DESC_SWAP_OPERANDS 1
26070 struct builtin_description
26072 const HOST_WIDE_INT mask;
26073 const enum insn_code icode;
26074 const char *const name;
26075 const enum ix86_builtins code;
26076 const enum rtx_code comparison;
26080 static const struct builtin_description bdesc_comi[] =
26082 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
26083 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
26084 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
26085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
26086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
26087 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
26088 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
26089 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
26090 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
26091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
26092 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
26093 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
26094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
26095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
26096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
26097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
26098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
26099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
26100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
26101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
26102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
26103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
26104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
26105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
26108 static const struct builtin_description bdesc_pcmpestr[] =
26111 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
26112 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
26113 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
26114 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
26115 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
26116 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
26117 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
26120 static const struct builtin_description bdesc_pcmpistr[] =
26123 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
26124 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
26125 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
26126 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
26127 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
26128 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
26129 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
26132 /* Special builtins with variable number of arguments. */
26133 static const struct builtin_description bdesc_special_args[] =
26135 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
26136 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
26137 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
26140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
26143 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
26146 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26147 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26148 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
26150 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
26151 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
26152 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
26153 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
26155 /* SSE or 3DNow!A */
26156 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26157 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
26160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
26164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
26166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
26167 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
26168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
26169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
26171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
26172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
26175 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
26178 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
26181 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26182 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26185 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
26186 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
26188 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
26189 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
26190 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
26191 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
26192 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
26194 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
26195 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
26196 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
26197 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
26198 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
26199 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
26200 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
26202 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
26203 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
26204 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
26206 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
26207 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
26208 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
26209 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
26210 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
26211 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
26212 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
26213 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
26216 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
26217 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
26218 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
26219 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
26220 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
26221 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
26222 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
26223 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
26224 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
26226 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
26227 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
26228 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
26229 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
26230 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
26231 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
26234 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
26235 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
26236 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
26237 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
26238 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
26239 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
26240 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
26241 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
26244 /* Builtins with variable number of arguments. */
26245 static const struct builtin_description bdesc_args[] =
26247 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
26248 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
26249 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
26250 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
26251 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
26252 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
26253 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
26256 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26257 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26258 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26259 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26260 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26261 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26263 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26264 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26265 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26266 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26267 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26268 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26269 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26270 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26272 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26273 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26275 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26276 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26277 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26278 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26280 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26281 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26282 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26283 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26284 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26285 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26287 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26288 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26289 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26290 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26291 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
26292 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
26294 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
26295 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
26296 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
26298 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
26300 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26301 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
26303 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26305 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
26307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26308 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
26310 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
26314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26317 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26320 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
26321 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
26322 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26323 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26325 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26326 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26327 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26328 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26329 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26330 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26331 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26332 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26333 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26334 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26335 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26336 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26337 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26338 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26339 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26342 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
26343 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
26344 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
26345 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26346 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26347 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
26351 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26352 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26353 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26354 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26355 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26356 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
26357 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
26358 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
26359 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
26360 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
26361 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
26363 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26365 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26366 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26367 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26368 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26369 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26370 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26371 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26372 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26374 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
26375 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
26376 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
26377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26378 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26379 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26380 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
26381 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
26382 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
26383 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26384 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
26385 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26386 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
26387 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
26388 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
26389 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26390 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
26391 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
26392 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
26393 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26394 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26395 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26397 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26398 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26402 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26404 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26405 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26407 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26412 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26413 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
26416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
26417 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
26419 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
26421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26425 /* SSE MMX or 3Dnow!A */
26426 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26427 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26428 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26430 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26431 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26432 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26433 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26435 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
26436 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
26438 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
26441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
26444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
26445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
26446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
26447 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
26449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
26450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
26451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
26452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
26453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
26455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
26457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
26458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
26459 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
26460 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
26462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
26464 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26466 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26467 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26468 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26469 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
26476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
26477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
26478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
26480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
26482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
26483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
26484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
26488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
26489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
26490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
26492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
26493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
26494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26497 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26501 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26503 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26504 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26506 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26509 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26510 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26512 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
26514 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26515 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26516 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26517 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26518 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26519 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26520 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26521 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26532 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26533 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
26535 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26537 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26538 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26550 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26551 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26552 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26555 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26556 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26557 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26558 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26559 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26560 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26561 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26562 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
26565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
26566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
26568 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
26571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
26572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
26574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
26576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
26577 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
26578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
26579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
26581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
26582 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26583 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26584 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
26585 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26586 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26587 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
26589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
26590 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26591 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26592 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
26593 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26594 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26595 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
26597 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26598 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26599 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26600 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
26603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
26604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
26606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
26608 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
26609 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
26611 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26614 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
26615 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
26618 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
26619 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26621 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26622 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26623 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26624 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26625 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26626 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26629 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
26630 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
26631 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26632 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
26633 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
26634 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
26636 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26637 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26638 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26639 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26640 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26641 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26642 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26643 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26644 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26645 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26646 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26647 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26648 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
26649 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
26650 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26651 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26652 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26653 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26654 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26655 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26656 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26657 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26658 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26659 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26662 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
26663 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
26666 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26667 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26668 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
26669 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
26670 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26671 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26672 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26673 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
26674 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
26675 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
26677 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
26678 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
26679 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
26680 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
26681 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
26682 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
26683 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
26684 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
26685 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
26686 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
26687 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
26688 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
26689 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26691 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
26692 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26693 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26694 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26695 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26696 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26697 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26698 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26699 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26700 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26701 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
26702 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26705 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26706 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26707 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26708 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26710 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
26711 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
26712 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
26713 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
26715 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
26716 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
26718 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
26719 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
26721 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
26722 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
26723 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
26724 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
26726 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
26727 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
26729 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26730 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26732 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26733 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26734 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26737 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26738 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
26739 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
26740 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26741 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26744 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
26745 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
26746 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
26747 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26750 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
26751 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26753 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26754 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26755 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26756 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26759 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
26762 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26763 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26766 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26767 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26770 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26776 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26777 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26778 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26779 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26780 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26781 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26782 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26783 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26784 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26785 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26786 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26787 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
26790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
26791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
26792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
26794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
26797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
26798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
26808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
26809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
26810 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
26811 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
26812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
26813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
26815 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26817 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
26826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
26827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
26829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26833 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26835 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26837 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
26845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
26846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
26847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
26849 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26850 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
26852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
26853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
26855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
26856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
26857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
26858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
26860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
26861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
26863 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26864 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
26872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
26873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
26874 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
26875 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
26876 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
26878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
26895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
26897 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26898 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26900 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
26903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
26904 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
26905 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
26906 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
26907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
26908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
26909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
26910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
26911 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26912 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26913 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26914 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26915 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26916 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26917 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26918 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26919 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
26920 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26922 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26923 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26924 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
26925 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
26926 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26927 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26929 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26930 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26931 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26932 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26933 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26934 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26936 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
26942 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26943 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26944 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26945 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26946 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26947 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26948 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26949 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26950 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26951 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26952 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26953 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
26955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26969 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26970 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26971 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26972 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26974 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26975 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26976 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26977 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
26978 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26979 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26980 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26981 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26982 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26983 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
26984 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26985 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26986 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26987 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26988 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26989 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26990 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26991 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26992 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26993 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26994 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
26995 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26996 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26997 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26998 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26999 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
27000 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
27001 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27002 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27003 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27004 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27005 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27006 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27007 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27008 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27009 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27010 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27011 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27012 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27013 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27014 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27015 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27016 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27017 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27018 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27019 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
27020 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
27021 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
27022 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
27023 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
27024 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
27025 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
27026 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
27027 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
27028 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
27029 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
27030 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
27031 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
27032 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27033 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
27034 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
27035 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
27036 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
27037 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
27038 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
27039 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27040 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27041 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27042 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27043 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27044 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27045 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27046 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27047 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27048 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27050 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
27053 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27054 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27055 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
27058 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27059 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27062 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
27063 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
27064 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
27065 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
27068 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27069 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27070 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27071 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27072 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27073 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27076 /* FMA4 and XOP. */
27077 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
27078 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
27079 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
27080 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
27081 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
27082 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
27083 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
27084 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
27085 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
27086 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
27087 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
27088 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
27089 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
27090 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
27091 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
27092 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
27093 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27094 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27095 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27096 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27097 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27098 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27099 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27100 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27101 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27102 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27103 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27104 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27105 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27106 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27107 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27108 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27109 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27110 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27111 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27112 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27113 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27114 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27115 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27116 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27117 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27118 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27119 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27120 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27121 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27122 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27123 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27124 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27125 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27126 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27127 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27128 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27130 static const struct builtin_description bdesc_multi_arg[] =
27132 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
27133 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
27134 UNKNOWN, (int)MULTI_ARG_3_SF },
27135 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
27136 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
27137 UNKNOWN, (int)MULTI_ARG_3_DF },
27139 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
27140 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
27141 UNKNOWN, (int)MULTI_ARG_3_SF },
27142 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
27143 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
27144 UNKNOWN, (int)MULTI_ARG_3_DF },
27146 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
27147 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
27148 UNKNOWN, (int)MULTI_ARG_3_SF },
27149 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
27150 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
27151 UNKNOWN, (int)MULTI_ARG_3_DF },
27152 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
27153 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
27154 UNKNOWN, (int)MULTI_ARG_3_SF2 },
27155 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
27156 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
27157 UNKNOWN, (int)MULTI_ARG_3_DF2 },
27159 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
27160 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
27161 UNKNOWN, (int)MULTI_ARG_3_SF },
27162 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
27163 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
27164 UNKNOWN, (int)MULTI_ARG_3_DF },
27165 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
27166 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
27167 UNKNOWN, (int)MULTI_ARG_3_SF2 },
27168 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
27169 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
27170 UNKNOWN, (int)MULTI_ARG_3_DF2 },
27172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
27173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
27174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
27175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
27176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
27177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
27178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
27180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
27181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
27182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
27183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
27184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
27185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
27186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
27188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
27190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
27191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
27192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
27195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
27196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
27205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
27206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
27207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
27208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
27209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
27210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
27211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
27213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
27214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
27215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
27217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
27218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
27220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
27221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
27222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
27223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
27224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
27225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
27227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
27229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
27230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
27232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
27235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
27236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
27238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
27244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
27245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
27246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
27247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
27248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
27249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
27251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
27252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
27253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
27254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
27255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
27256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
27257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
27259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
27260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
27261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
27262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
27263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
27264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
27265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
27267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
27268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
27269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
27270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
27271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
27272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
27273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
27275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
27276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
27277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
27278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
27279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
27280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
27281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
27283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
27284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
27285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
27286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
27287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
27288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
27289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
27291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
27292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
27293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
27294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
27295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
27296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
27297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
27299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
27300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
27301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
27302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
27303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
27304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
27305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
27307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
27308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
27309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
27310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
27311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
27312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
27313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
27314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
27316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
27317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
27318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
27319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
27320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
27321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
27322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
27323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
27325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
27326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
27327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
27328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
27332 /* TM vector builtins. */
27334 /* Reuse the existing x86-specific `struct builtin_description' cause
27335 we're lazy. Add casts to make them fit. */
27336 static const struct builtin_description bdesc_tm[] =
27338 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27339 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27340 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27341 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27342 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27343 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27344 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27346 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27347 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27348 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27349 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27350 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27351 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27352 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27354 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27355 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27356 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27357 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27358 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27359 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27360 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27362 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
27363 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
27364 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
27367 /* TM callbacks. */
27369 /* Return the builtin decl needed to load a vector of TYPE. */
27372 ix86_builtin_tm_load (tree type)
27374 if (TREE_CODE (type) == VECTOR_TYPE)
27376 switch (tree_low_cst (TYPE_SIZE (type), 1))
27379 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
27381 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
27383 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
27389 /* Return the builtin decl needed to store a vector of TYPE. */
27392 ix86_builtin_tm_store (tree type)
27394 if (TREE_CODE (type) == VECTOR_TYPE)
27396 switch (tree_low_cst (TYPE_SIZE (type), 1))
27399 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
27401 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
27403 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
27409 /* Initialize the transactional memory vector load/store builtins. */
27412 ix86_init_tm_builtins (void)
27414 enum ix86_builtin_func_type ftype;
27415 const struct builtin_description *d;
27418 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
27419 tree attrs_log, attrs_type_log;
27424 /* If there are no builtins defined, we must be compiling in a
27425 language without trans-mem support. */
27426 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
27429 /* Use whatever attributes a normal TM load has. */
27430 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
27431 attrs_load = DECL_ATTRIBUTES (decl);
27432 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27433 /* Use whatever attributes a normal TM store has. */
27434 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
27435 attrs_store = DECL_ATTRIBUTES (decl);
27436 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27437 /* Use whatever attributes a normal TM log has. */
27438 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
27439 attrs_log = DECL_ATTRIBUTES (decl);
27440 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27442 for (i = 0, d = bdesc_tm;
27443 i < ARRAY_SIZE (bdesc_tm);
27446 if ((d->mask & ix86_isa_flags) != 0
27447 || (lang_hooks.builtin_function
27448 == lang_hooks.builtin_function_ext_scope))
27450 tree type, attrs, attrs_type;
27451 enum built_in_function code = (enum built_in_function) d->code;
27453 ftype = (enum ix86_builtin_func_type) d->flag;
27454 type = ix86_get_builtin_func_type (ftype);
27456 if (BUILTIN_TM_LOAD_P (code))
27458 attrs = attrs_load;
27459 attrs_type = attrs_type_load;
27461 else if (BUILTIN_TM_STORE_P (code))
27463 attrs = attrs_store;
27464 attrs_type = attrs_type_store;
27469 attrs_type = attrs_type_log;
27471 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
27472 /* The builtin without the prefix for
27473 calling it directly. */
27474 d->name + strlen ("__builtin_"),
27476 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27477 set the TYPE_ATTRIBUTES. */
27478 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
27480 set_builtin_decl (code, decl, false);
27485 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27486 in the current target ISA to allow the user to compile particular modules
27487 with different target specific options that differ from the command line
27490 ix86_init_mmx_sse_builtins (void)
27492 const struct builtin_description * d;
27493 enum ix86_builtin_func_type ftype;
27496 /* Add all special builtins with variable number of operands. */
27497 for (i = 0, d = bdesc_special_args;
27498 i < ARRAY_SIZE (bdesc_special_args);
27504 ftype = (enum ix86_builtin_func_type) d->flag;
27505 def_builtin (d->mask, d->name, ftype, d->code);
27508 /* Add all builtins with variable number of operands. */
27509 for (i = 0, d = bdesc_args;
27510 i < ARRAY_SIZE (bdesc_args);
27516 ftype = (enum ix86_builtin_func_type) d->flag;
27517 def_builtin_const (d->mask, d->name, ftype, d->code);
27520 /* pcmpestr[im] insns. */
27521 for (i = 0, d = bdesc_pcmpestr;
27522 i < ARRAY_SIZE (bdesc_pcmpestr);
27525 if (d->code == IX86_BUILTIN_PCMPESTRM128)
27526 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
27528 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
27529 def_builtin_const (d->mask, d->name, ftype, d->code);
27532 /* pcmpistr[im] insns. */
27533 for (i = 0, d = bdesc_pcmpistr;
27534 i < ARRAY_SIZE (bdesc_pcmpistr);
27537 if (d->code == IX86_BUILTIN_PCMPISTRM128)
27538 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
27540 ftype = INT_FTYPE_V16QI_V16QI_INT;
27541 def_builtin_const (d->mask, d->name, ftype, d->code);
27544 /* comi/ucomi insns. */
27545 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27547 if (d->mask == OPTION_MASK_ISA_SSE2)
27548 ftype = INT_FTYPE_V2DF_V2DF;
27550 ftype = INT_FTYPE_V4SF_V4SF;
27551 def_builtin_const (d->mask, d->name, ftype, d->code);
27555 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
27556 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
27557 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
27558 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
27560 /* SSE or 3DNow!A */
27561 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27562 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
27563 IX86_BUILTIN_MASKMOVQ);
27566 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
27567 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
27569 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
27570 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
27571 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
27572 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
27575 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
27576 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
27577 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
27578 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
27581 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
27582 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
27583 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
27584 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
27585 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
27586 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
27587 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
27588 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
27589 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
27590 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
27591 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
27592 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
27595 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
27596 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
27599 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
27600 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
27601 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
27602 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
27603 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
27604 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
27605 IX86_BUILTIN_RDRAND64_STEP);
27608 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
27609 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
27610 IX86_BUILTIN_GATHERSIV2DF);
27612 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
27613 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
27614 IX86_BUILTIN_GATHERSIV4DF);
27616 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
27617 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
27618 IX86_BUILTIN_GATHERDIV2DF);
27620 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
27621 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
27622 IX86_BUILTIN_GATHERDIV4DF);
27624 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
27625 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
27626 IX86_BUILTIN_GATHERSIV4SF);
27628 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
27629 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
27630 IX86_BUILTIN_GATHERSIV8SF);
27632 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
27633 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
27634 IX86_BUILTIN_GATHERDIV4SF);
27636 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
27637 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
27638 IX86_BUILTIN_GATHERDIV8SF);
27640 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
27641 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
27642 IX86_BUILTIN_GATHERSIV2DI);
27644 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
27645 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
27646 IX86_BUILTIN_GATHERSIV4DI);
27648 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
27649 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
27650 IX86_BUILTIN_GATHERDIV2DI);
27652 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
27653 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
27654 IX86_BUILTIN_GATHERDIV4DI);
27656 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
27657 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
27658 IX86_BUILTIN_GATHERSIV4SI);
27660 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
27661 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
27662 IX86_BUILTIN_GATHERSIV8SI);
27664 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
27665 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
27666 IX86_BUILTIN_GATHERDIV4SI);
27668 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
27669 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
27670 IX86_BUILTIN_GATHERDIV8SI);
27672 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
27673 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
27674 IX86_BUILTIN_GATHERALTSIV4DF);
27676 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
27677 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
27678 IX86_BUILTIN_GATHERALTDIV8SF);
27680 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
27681 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
27682 IX86_BUILTIN_GATHERALTSIV4DI);
27684 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
27685 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
27686 IX86_BUILTIN_GATHERALTDIV8SI);
27688 /* MMX access to the vec_init patterns. */
27689 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
27690 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
27692 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
27693 V4HI_FTYPE_HI_HI_HI_HI,
27694 IX86_BUILTIN_VEC_INIT_V4HI);
27696 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
27697 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
27698 IX86_BUILTIN_VEC_INIT_V8QI);
27700 /* Access to the vec_extract patterns. */
27701 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
27702 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
27703 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
27704 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
27705 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
27706 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
27707 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
27708 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
27709 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
27710 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
27712 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27713 "__builtin_ia32_vec_ext_v4hi",
27714 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
27716 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
27717 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
27719 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
27720 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
27722 /* Access to the vec_set patterns. */
27723 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
27724 "__builtin_ia32_vec_set_v2di",
27725 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
27727 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
27728 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
27730 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
27731 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
27733 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
27734 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
27736 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27737 "__builtin_ia32_vec_set_v4hi",
27738 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
27740 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
27741 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
27743 /* Add FMA4 multi-arg argument instructions */
27744 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27749 ftype = (enum ix86_builtin_func_type) d->flag;
27750 def_builtin_const (d->mask, d->name, ftype, d->code);
27754 /* Internal method for ix86_init_builtins. */
27757 ix86_init_builtins_va_builtins_abi (void)
27759 tree ms_va_ref, sysv_va_ref;
27760 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
27761 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
27762 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
27763 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
27767 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
27768 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
27769 ms_va_ref = build_reference_type (ms_va_list_type_node);
27771 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
27774 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
27775 fnvoid_va_start_ms =
27776 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
27777 fnvoid_va_end_sysv =
27778 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
27779 fnvoid_va_start_sysv =
27780 build_varargs_function_type_list (void_type_node, sysv_va_ref,
27782 fnvoid_va_copy_ms =
27783 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
27785 fnvoid_va_copy_sysv =
27786 build_function_type_list (void_type_node, sysv_va_ref,
27787 sysv_va_ref, NULL_TREE);
27789 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
27790 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
27791 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
27792 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
27793 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
27794 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
27795 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
27796 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27797 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
27798 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27799 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
27800 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27804 ix86_init_builtin_types (void)
27806 tree float128_type_node, float80_type_node;
27808 /* The __float80 type. */
27809 float80_type_node = long_double_type_node;
27810 if (TYPE_MODE (float80_type_node) != XFmode)
27812 /* The __float80 type. */
27813 float80_type_node = make_node (REAL_TYPE);
27815 TYPE_PRECISION (float80_type_node) = 80;
27816 layout_type (float80_type_node);
27818 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
27820 /* The __float128 type. */
27821 float128_type_node = make_node (REAL_TYPE);
27822 TYPE_PRECISION (float128_type_node) = 128;
27823 layout_type (float128_type_node);
27824 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
27826 /* This macro is built by i386-builtin-types.awk. */
27827 DEFINE_BUILTIN_PRIMITIVE_TYPES;
27831 ix86_init_builtins (void)
27835 ix86_init_builtin_types ();
27837 /* TFmode support builtins. */
27838 def_builtin_const (0, "__builtin_infq",
27839 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
27840 def_builtin_const (0, "__builtin_huge_valq",
27841 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
27843 /* We will expand them to normal call if SSE2 isn't available since
27844 they are used by libgcc. */
27845 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
27846 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
27847 BUILT_IN_MD, "__fabstf2", NULL_TREE);
27848 TREE_READONLY (t) = 1;
27849 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
27851 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
27852 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
27853 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
27854 TREE_READONLY (t) = 1;
27855 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
27857 ix86_init_tm_builtins ();
27858 ix86_init_mmx_sse_builtins ();
27861 ix86_init_builtins_va_builtins_abi ();
27863 #ifdef SUBTARGET_INIT_BUILTINS
27864 SUBTARGET_INIT_BUILTINS;
27868 /* Return the ix86 builtin for CODE. */
27871 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
27873 if (code >= IX86_BUILTIN_MAX)
27874 return error_mark_node;
27876 return ix86_builtins[code];
27879 /* Errors in the source file can cause expand_expr to return const0_rtx
27880 where we expect a vector. To avoid crashing, use one of the vector
27881 clear instructions. */
27883 safe_vector_operand (rtx x, enum machine_mode mode)
27885 if (x == const0_rtx)
27886 x = CONST0_RTX (mode);
27890 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
27893 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
27896 tree arg0 = CALL_EXPR_ARG (exp, 0);
27897 tree arg1 = CALL_EXPR_ARG (exp, 1);
27898 rtx op0 = expand_normal (arg0);
27899 rtx op1 = expand_normal (arg1);
27900 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27901 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
27902 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
27904 if (VECTOR_MODE_P (mode0))
27905 op0 = safe_vector_operand (op0, mode0);
27906 if (VECTOR_MODE_P (mode1))
27907 op1 = safe_vector_operand (op1, mode1);
27909 if (optimize || !target
27910 || GET_MODE (target) != tmode
27911 || !insn_data[icode].operand[0].predicate (target, tmode))
27912 target = gen_reg_rtx (tmode);
27914 if (GET_MODE (op1) == SImode && mode1 == TImode)
27916 rtx x = gen_reg_rtx (V4SImode);
27917 emit_insn (gen_sse2_loadd (x, op1));
27918 op1 = gen_lowpart (TImode, x);
27921 if (!insn_data[icode].operand[1].predicate (op0, mode0))
27922 op0 = copy_to_mode_reg (mode0, op0);
27923 if (!insn_data[icode].operand[2].predicate (op1, mode1))
27924 op1 = copy_to_mode_reg (mode1, op1);
27926 pat = GEN_FCN (icode) (target, op0, op1);
27935 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
27938 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
27939 enum ix86_builtin_func_type m_type,
27940 enum rtx_code sub_code)
27945 bool comparison_p = false;
27947 bool last_arg_constant = false;
27948 int num_memory = 0;
27951 enum machine_mode mode;
27954 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27958 case MULTI_ARG_4_DF2_DI_I:
27959 case MULTI_ARG_4_DF2_DI_I1:
27960 case MULTI_ARG_4_SF2_SI_I:
27961 case MULTI_ARG_4_SF2_SI_I1:
27963 last_arg_constant = true;
27966 case MULTI_ARG_3_SF:
27967 case MULTI_ARG_3_DF:
27968 case MULTI_ARG_3_SF2:
27969 case MULTI_ARG_3_DF2:
27970 case MULTI_ARG_3_DI:
27971 case MULTI_ARG_3_SI:
27972 case MULTI_ARG_3_SI_DI:
27973 case MULTI_ARG_3_HI:
27974 case MULTI_ARG_3_HI_SI:
27975 case MULTI_ARG_3_QI:
27976 case MULTI_ARG_3_DI2:
27977 case MULTI_ARG_3_SI2:
27978 case MULTI_ARG_3_HI2:
27979 case MULTI_ARG_3_QI2:
27983 case MULTI_ARG_2_SF:
27984 case MULTI_ARG_2_DF:
27985 case MULTI_ARG_2_DI:
27986 case MULTI_ARG_2_SI:
27987 case MULTI_ARG_2_HI:
27988 case MULTI_ARG_2_QI:
27992 case MULTI_ARG_2_DI_IMM:
27993 case MULTI_ARG_2_SI_IMM:
27994 case MULTI_ARG_2_HI_IMM:
27995 case MULTI_ARG_2_QI_IMM:
27997 last_arg_constant = true;
28000 case MULTI_ARG_1_SF:
28001 case MULTI_ARG_1_DF:
28002 case MULTI_ARG_1_SF2:
28003 case MULTI_ARG_1_DF2:
28004 case MULTI_ARG_1_DI:
28005 case MULTI_ARG_1_SI:
28006 case MULTI_ARG_1_HI:
28007 case MULTI_ARG_1_QI:
28008 case MULTI_ARG_1_SI_DI:
28009 case MULTI_ARG_1_HI_DI:
28010 case MULTI_ARG_1_HI_SI:
28011 case MULTI_ARG_1_QI_DI:
28012 case MULTI_ARG_1_QI_SI:
28013 case MULTI_ARG_1_QI_HI:
28017 case MULTI_ARG_2_DI_CMP:
28018 case MULTI_ARG_2_SI_CMP:
28019 case MULTI_ARG_2_HI_CMP:
28020 case MULTI_ARG_2_QI_CMP:
28022 comparison_p = true;
28025 case MULTI_ARG_2_SF_TF:
28026 case MULTI_ARG_2_DF_TF:
28027 case MULTI_ARG_2_DI_TF:
28028 case MULTI_ARG_2_SI_TF:
28029 case MULTI_ARG_2_HI_TF:
28030 case MULTI_ARG_2_QI_TF:
28036 gcc_unreachable ();
28039 if (optimize || !target
28040 || GET_MODE (target) != tmode
28041 || !insn_data[icode].operand[0].predicate (target, tmode))
28042 target = gen_reg_rtx (tmode);
28044 gcc_assert (nargs <= 4);
28046 for (i = 0; i < nargs; i++)
28048 tree arg = CALL_EXPR_ARG (exp, i);
28049 rtx op = expand_normal (arg);
28050 int adjust = (comparison_p) ? 1 : 0;
28051 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
28053 if (last_arg_constant && i == nargs - 1)
28055 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
28057 enum insn_code new_icode = icode;
28060 case CODE_FOR_xop_vpermil2v2df3:
28061 case CODE_FOR_xop_vpermil2v4sf3:
28062 case CODE_FOR_xop_vpermil2v4df3:
28063 case CODE_FOR_xop_vpermil2v8sf3:
28064 error ("the last argument must be a 2-bit immediate");
28065 return gen_reg_rtx (tmode);
28066 case CODE_FOR_xop_rotlv2di3:
28067 new_icode = CODE_FOR_rotlv2di3;
28069 case CODE_FOR_xop_rotlv4si3:
28070 new_icode = CODE_FOR_rotlv4si3;
28072 case CODE_FOR_xop_rotlv8hi3:
28073 new_icode = CODE_FOR_rotlv8hi3;
28075 case CODE_FOR_xop_rotlv16qi3:
28076 new_icode = CODE_FOR_rotlv16qi3;
28078 if (CONST_INT_P (op))
28080 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
28081 op = GEN_INT (INTVAL (op) & mask);
28082 gcc_checking_assert
28083 (insn_data[icode].operand[i + 1].predicate (op, mode));
28087 gcc_checking_assert
28089 && insn_data[new_icode].operand[0].mode == tmode
28090 && insn_data[new_icode].operand[1].mode == tmode
28091 && insn_data[new_icode].operand[2].mode == mode
28092 && insn_data[new_icode].operand[0].predicate
28093 == insn_data[icode].operand[0].predicate
28094 && insn_data[new_icode].operand[1].predicate
28095 == insn_data[icode].operand[1].predicate);
28101 gcc_unreachable ();
28108 if (VECTOR_MODE_P (mode))
28109 op = safe_vector_operand (op, mode);
28111 /* If we aren't optimizing, only allow one memory operand to be
28113 if (memory_operand (op, mode))
28116 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
28119 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
28121 op = force_reg (mode, op);
28125 args[i].mode = mode;
28131 pat = GEN_FCN (icode) (target, args[0].op);
28136 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
28137 GEN_INT ((int)sub_code));
28138 else if (! comparison_p)
28139 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
28142 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
28146 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
28151 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
28155 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
28159 gcc_unreachable ();
28169 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
28170 insns with vec_merge. */
28173 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
28177 tree arg0 = CALL_EXPR_ARG (exp, 0);
28178 rtx op1, op0 = expand_normal (arg0);
28179 enum machine_mode tmode = insn_data[icode].operand[0].mode;
28180 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
28182 if (optimize || !target
28183 || GET_MODE (target) != tmode
28184 || !insn_data[icode].operand[0].predicate (target, tmode))
28185 target = gen_reg_rtx (tmode);
28187 if (VECTOR_MODE_P (mode0))
28188 op0 = safe_vector_operand (op0, mode0);
28190 if ((optimize && !register_operand (op0, mode0))
28191 || !insn_data[icode].operand[1].predicate (op0, mode0))
28192 op0 = copy_to_mode_reg (mode0, op0);
28195 if (!insn_data[icode].operand[2].predicate (op1, mode0))
28196 op1 = copy_to_mode_reg (mode0, op1);
28198 pat = GEN_FCN (icode) (target, op0, op1);
28205 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
28208 ix86_expand_sse_compare (const struct builtin_description *d,
28209 tree exp, rtx target, bool swap)
28212 tree arg0 = CALL_EXPR_ARG (exp, 0);
28213 tree arg1 = CALL_EXPR_ARG (exp, 1);
28214 rtx op0 = expand_normal (arg0);
28215 rtx op1 = expand_normal (arg1);
28217 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28218 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28219 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
28220 enum rtx_code comparison = d->comparison;
28222 if (VECTOR_MODE_P (mode0))
28223 op0 = safe_vector_operand (op0, mode0);
28224 if (VECTOR_MODE_P (mode1))
28225 op1 = safe_vector_operand (op1, mode1);
28227 /* Swap operands if we have a comparison that isn't available in
28231 rtx tmp = gen_reg_rtx (mode1);
28232 emit_move_insn (tmp, op1);
28237 if (optimize || !target
28238 || GET_MODE (target) != tmode
28239 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28240 target = gen_reg_rtx (tmode);
28242 if ((optimize && !register_operand (op0, mode0))
28243 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
28244 op0 = copy_to_mode_reg (mode0, op0);
28245 if ((optimize && !register_operand (op1, mode1))
28246 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
28247 op1 = copy_to_mode_reg (mode1, op1);
28249 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
28250 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
28257 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28260 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
28264 tree arg0 = CALL_EXPR_ARG (exp, 0);
28265 tree arg1 = CALL_EXPR_ARG (exp, 1);
28266 rtx op0 = expand_normal (arg0);
28267 rtx op1 = expand_normal (arg1);
28268 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
28269 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
28270 enum rtx_code comparison = d->comparison;
28272 if (VECTOR_MODE_P (mode0))
28273 op0 = safe_vector_operand (op0, mode0);
28274 if (VECTOR_MODE_P (mode1))
28275 op1 = safe_vector_operand (op1, mode1);
28277 /* Swap operands if we have a comparison that isn't available in
28279 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
28286 target = gen_reg_rtx (SImode);
28287 emit_move_insn (target, const0_rtx);
28288 target = gen_rtx_SUBREG (QImode, target, 0);
28290 if ((optimize && !register_operand (op0, mode0))
28291 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28292 op0 = copy_to_mode_reg (mode0, op0);
28293 if ((optimize && !register_operand (op1, mode1))
28294 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28295 op1 = copy_to_mode_reg (mode1, op1);
28297 pat = GEN_FCN (d->icode) (op0, op1);
28301 emit_insn (gen_rtx_SET (VOIDmode,
28302 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28303 gen_rtx_fmt_ee (comparison, QImode,
28307 return SUBREG_REG (target);
28310 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28313 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
28317 tree arg0 = CALL_EXPR_ARG (exp, 0);
28318 rtx op1, op0 = expand_normal (arg0);
28319 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28320 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28322 if (optimize || target == 0
28323 || GET_MODE (target) != tmode
28324 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28325 target = gen_reg_rtx (tmode);
28327 if (VECTOR_MODE_P (mode0))
28328 op0 = safe_vector_operand (op0, mode0);
28330 if ((optimize && !register_operand (op0, mode0))
28331 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28332 op0 = copy_to_mode_reg (mode0, op0);
28334 op1 = GEN_INT (d->comparison);
28336 pat = GEN_FCN (d->icode) (target, op0, op1);
28344 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
28345 tree exp, rtx target)
28348 tree arg0 = CALL_EXPR_ARG (exp, 0);
28349 tree arg1 = CALL_EXPR_ARG (exp, 1);
28350 rtx op0 = expand_normal (arg0);
28351 rtx op1 = expand_normal (arg1);
28353 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28354 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28355 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
28357 if (optimize || target == 0
28358 || GET_MODE (target) != tmode
28359 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28360 target = gen_reg_rtx (tmode);
28362 op0 = safe_vector_operand (op0, mode0);
28363 op1 = safe_vector_operand (op1, mode1);
28365 if ((optimize && !register_operand (op0, mode0))
28366 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28367 op0 = copy_to_mode_reg (mode0, op0);
28368 if ((optimize && !register_operand (op1, mode1))
28369 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28370 op1 = copy_to_mode_reg (mode1, op1);
28372 op2 = GEN_INT (d->comparison);
28374 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
28381 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28384 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
28388 tree arg0 = CALL_EXPR_ARG (exp, 0);
28389 tree arg1 = CALL_EXPR_ARG (exp, 1);
28390 rtx op0 = expand_normal (arg0);
28391 rtx op1 = expand_normal (arg1);
28392 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
28393 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
28394 enum rtx_code comparison = d->comparison;
28396 if (VECTOR_MODE_P (mode0))
28397 op0 = safe_vector_operand (op0, mode0);
28398 if (VECTOR_MODE_P (mode1))
28399 op1 = safe_vector_operand (op1, mode1);
28401 target = gen_reg_rtx (SImode);
28402 emit_move_insn (target, const0_rtx);
28403 target = gen_rtx_SUBREG (QImode, target, 0);
28405 if ((optimize && !register_operand (op0, mode0))
28406 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28407 op0 = copy_to_mode_reg (mode0, op0);
28408 if ((optimize && !register_operand (op1, mode1))
28409 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28410 op1 = copy_to_mode_reg (mode1, op1);
28412 pat = GEN_FCN (d->icode) (op0, op1);
28416 emit_insn (gen_rtx_SET (VOIDmode,
28417 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28418 gen_rtx_fmt_ee (comparison, QImode,
28422 return SUBREG_REG (target);
28425 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28428 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
28429 tree exp, rtx target)
28432 tree arg0 = CALL_EXPR_ARG (exp, 0);
28433 tree arg1 = CALL_EXPR_ARG (exp, 1);
28434 tree arg2 = CALL_EXPR_ARG (exp, 2);
28435 tree arg3 = CALL_EXPR_ARG (exp, 3);
28436 tree arg4 = CALL_EXPR_ARG (exp, 4);
28437 rtx scratch0, scratch1;
28438 rtx op0 = expand_normal (arg0);
28439 rtx op1 = expand_normal (arg1);
28440 rtx op2 = expand_normal (arg2);
28441 rtx op3 = expand_normal (arg3);
28442 rtx op4 = expand_normal (arg4);
28443 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
28445 tmode0 = insn_data[d->icode].operand[0].mode;
28446 tmode1 = insn_data[d->icode].operand[1].mode;
28447 modev2 = insn_data[d->icode].operand[2].mode;
28448 modei3 = insn_data[d->icode].operand[3].mode;
28449 modev4 = insn_data[d->icode].operand[4].mode;
28450 modei5 = insn_data[d->icode].operand[5].mode;
28451 modeimm = insn_data[d->icode].operand[6].mode;
28453 if (VECTOR_MODE_P (modev2))
28454 op0 = safe_vector_operand (op0, modev2);
28455 if (VECTOR_MODE_P (modev4))
28456 op2 = safe_vector_operand (op2, modev4);
28458 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
28459 op0 = copy_to_mode_reg (modev2, op0);
28460 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
28461 op1 = copy_to_mode_reg (modei3, op1);
28462 if ((optimize && !register_operand (op2, modev4))
28463 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
28464 op2 = copy_to_mode_reg (modev4, op2);
28465 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
28466 op3 = copy_to_mode_reg (modei5, op3);
28468 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
28470 error ("the fifth argument must be an 8-bit immediate");
28474 if (d->code == IX86_BUILTIN_PCMPESTRI128)
28476 if (optimize || !target
28477 || GET_MODE (target) != tmode0
28478 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
28479 target = gen_reg_rtx (tmode0);
28481 scratch1 = gen_reg_rtx (tmode1);
28483 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
28485 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
28487 if (optimize || !target
28488 || GET_MODE (target) != tmode1
28489 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
28490 target = gen_reg_rtx (tmode1);
28492 scratch0 = gen_reg_rtx (tmode0);
28494 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
28498 gcc_assert (d->flag);
28500 scratch0 = gen_reg_rtx (tmode0);
28501 scratch1 = gen_reg_rtx (tmode1);
28503 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
28513 target = gen_reg_rtx (SImode);
28514 emit_move_insn (target, const0_rtx);
28515 target = gen_rtx_SUBREG (QImode, target, 0);
28518 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28519 gen_rtx_fmt_ee (EQ, QImode,
28520 gen_rtx_REG ((enum machine_mode) d->flag,
28523 return SUBREG_REG (target);
28530 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
28533 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
28534 tree exp, rtx target)
28537 tree arg0 = CALL_EXPR_ARG (exp, 0);
28538 tree arg1 = CALL_EXPR_ARG (exp, 1);
28539 tree arg2 = CALL_EXPR_ARG (exp, 2);
28540 rtx scratch0, scratch1;
28541 rtx op0 = expand_normal (arg0);
28542 rtx op1 = expand_normal (arg1);
28543 rtx op2 = expand_normal (arg2);
28544 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
28546 tmode0 = insn_data[d->icode].operand[0].mode;
28547 tmode1 = insn_data[d->icode].operand[1].mode;
28548 modev2 = insn_data[d->icode].operand[2].mode;
28549 modev3 = insn_data[d->icode].operand[3].mode;
28550 modeimm = insn_data[d->icode].operand[4].mode;
28552 if (VECTOR_MODE_P (modev2))
28553 op0 = safe_vector_operand (op0, modev2);
28554 if (VECTOR_MODE_P (modev3))
28555 op1 = safe_vector_operand (op1, modev3);
28557 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
28558 op0 = copy_to_mode_reg (modev2, op0);
28559 if ((optimize && !register_operand (op1, modev3))
28560 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
28561 op1 = copy_to_mode_reg (modev3, op1);
28563 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
28565 error ("the third argument must be an 8-bit immediate");
28569 if (d->code == IX86_BUILTIN_PCMPISTRI128)
28571 if (optimize || !target
28572 || GET_MODE (target) != tmode0
28573 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
28574 target = gen_reg_rtx (tmode0);
28576 scratch1 = gen_reg_rtx (tmode1);
28578 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
28580 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
28582 if (optimize || !target
28583 || GET_MODE (target) != tmode1
28584 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
28585 target = gen_reg_rtx (tmode1);
28587 scratch0 = gen_reg_rtx (tmode0);
28589 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
28593 gcc_assert (d->flag);
28595 scratch0 = gen_reg_rtx (tmode0);
28596 scratch1 = gen_reg_rtx (tmode1);
28598 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
28608 target = gen_reg_rtx (SImode);
28609 emit_move_insn (target, const0_rtx);
28610 target = gen_rtx_SUBREG (QImode, target, 0);
28613 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28614 gen_rtx_fmt_ee (EQ, QImode,
28615 gen_rtx_REG ((enum machine_mode) d->flag,
28618 return SUBREG_REG (target);
28624 /* Subroutine of ix86_expand_builtin to take care of insns with
28625 variable number of operands. */
28628 ix86_expand_args_builtin (const struct builtin_description *d,
28629 tree exp, rtx target)
28631 rtx pat, real_target;
28632 unsigned int i, nargs;
28633 unsigned int nargs_constant = 0;
28634 int num_memory = 0;
28638 enum machine_mode mode;
28640 bool last_arg_count = false;
28641 enum insn_code icode = d->icode;
28642 const struct insn_data_d *insn_p = &insn_data[icode];
28643 enum machine_mode tmode = insn_p->operand[0].mode;
28644 enum machine_mode rmode = VOIDmode;
28646 enum rtx_code comparison = d->comparison;
28648 switch ((enum ix86_builtin_func_type) d->flag)
28650 case V2DF_FTYPE_V2DF_ROUND:
28651 case V4DF_FTYPE_V4DF_ROUND:
28652 case V4SF_FTYPE_V4SF_ROUND:
28653 case V8SF_FTYPE_V8SF_ROUND:
28654 case V4SI_FTYPE_V4SF_ROUND:
28655 case V8SI_FTYPE_V8SF_ROUND:
28656 return ix86_expand_sse_round (d, exp, target);
28657 case V4SI_FTYPE_V2DF_V2DF_ROUND:
28658 case V8SI_FTYPE_V4DF_V4DF_ROUND:
28659 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
28660 case INT_FTYPE_V8SF_V8SF_PTEST:
28661 case INT_FTYPE_V4DI_V4DI_PTEST:
28662 case INT_FTYPE_V4DF_V4DF_PTEST:
28663 case INT_FTYPE_V4SF_V4SF_PTEST:
28664 case INT_FTYPE_V2DI_V2DI_PTEST:
28665 case INT_FTYPE_V2DF_V2DF_PTEST:
28666 return ix86_expand_sse_ptest (d, exp, target);
28667 case FLOAT128_FTYPE_FLOAT128:
28668 case FLOAT_FTYPE_FLOAT:
28669 case INT_FTYPE_INT:
28670 case UINT64_FTYPE_INT:
28671 case UINT16_FTYPE_UINT16:
28672 case INT64_FTYPE_INT64:
28673 case INT64_FTYPE_V4SF:
28674 case INT64_FTYPE_V2DF:
28675 case INT_FTYPE_V16QI:
28676 case INT_FTYPE_V8QI:
28677 case INT_FTYPE_V8SF:
28678 case INT_FTYPE_V4DF:
28679 case INT_FTYPE_V4SF:
28680 case INT_FTYPE_V2DF:
28681 case INT_FTYPE_V32QI:
28682 case V16QI_FTYPE_V16QI:
28683 case V8SI_FTYPE_V8SF:
28684 case V8SI_FTYPE_V4SI:
28685 case V8HI_FTYPE_V8HI:
28686 case V8HI_FTYPE_V16QI:
28687 case V8QI_FTYPE_V8QI:
28688 case V8SF_FTYPE_V8SF:
28689 case V8SF_FTYPE_V8SI:
28690 case V8SF_FTYPE_V4SF:
28691 case V8SF_FTYPE_V8HI:
28692 case V4SI_FTYPE_V4SI:
28693 case V4SI_FTYPE_V16QI:
28694 case V4SI_FTYPE_V4SF:
28695 case V4SI_FTYPE_V8SI:
28696 case V4SI_FTYPE_V8HI:
28697 case V4SI_FTYPE_V4DF:
28698 case V4SI_FTYPE_V2DF:
28699 case V4HI_FTYPE_V4HI:
28700 case V4DF_FTYPE_V4DF:
28701 case V4DF_FTYPE_V4SI:
28702 case V4DF_FTYPE_V4SF:
28703 case V4DF_FTYPE_V2DF:
28704 case V4SF_FTYPE_V4SF:
28705 case V4SF_FTYPE_V4SI:
28706 case V4SF_FTYPE_V8SF:
28707 case V4SF_FTYPE_V4DF:
28708 case V4SF_FTYPE_V8HI:
28709 case V4SF_FTYPE_V2DF:
28710 case V2DI_FTYPE_V2DI:
28711 case V2DI_FTYPE_V16QI:
28712 case V2DI_FTYPE_V8HI:
28713 case V2DI_FTYPE_V4SI:
28714 case V2DF_FTYPE_V2DF:
28715 case V2DF_FTYPE_V4SI:
28716 case V2DF_FTYPE_V4DF:
28717 case V2DF_FTYPE_V4SF:
28718 case V2DF_FTYPE_V2SI:
28719 case V2SI_FTYPE_V2SI:
28720 case V2SI_FTYPE_V4SF:
28721 case V2SI_FTYPE_V2SF:
28722 case V2SI_FTYPE_V2DF:
28723 case V2SF_FTYPE_V2SF:
28724 case V2SF_FTYPE_V2SI:
28725 case V32QI_FTYPE_V32QI:
28726 case V32QI_FTYPE_V16QI:
28727 case V16HI_FTYPE_V16HI:
28728 case V16HI_FTYPE_V8HI:
28729 case V8SI_FTYPE_V8SI:
28730 case V16HI_FTYPE_V16QI:
28731 case V8SI_FTYPE_V16QI:
28732 case V4DI_FTYPE_V16QI:
28733 case V8SI_FTYPE_V8HI:
28734 case V4DI_FTYPE_V8HI:
28735 case V4DI_FTYPE_V4SI:
28736 case V4DI_FTYPE_V2DI:
28739 case V4SF_FTYPE_V4SF_VEC_MERGE:
28740 case V2DF_FTYPE_V2DF_VEC_MERGE:
28741 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
28742 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
28743 case V16QI_FTYPE_V16QI_V16QI:
28744 case V16QI_FTYPE_V8HI_V8HI:
28745 case V8QI_FTYPE_V8QI_V8QI:
28746 case V8QI_FTYPE_V4HI_V4HI:
28747 case V8HI_FTYPE_V8HI_V8HI:
28748 case V8HI_FTYPE_V16QI_V16QI:
28749 case V8HI_FTYPE_V4SI_V4SI:
28750 case V8SF_FTYPE_V8SF_V8SF:
28751 case V8SF_FTYPE_V8SF_V8SI:
28752 case V4SI_FTYPE_V4SI_V4SI:
28753 case V4SI_FTYPE_V8HI_V8HI:
28754 case V4SI_FTYPE_V4SF_V4SF:
28755 case V4SI_FTYPE_V2DF_V2DF:
28756 case V4HI_FTYPE_V4HI_V4HI:
28757 case V4HI_FTYPE_V8QI_V8QI:
28758 case V4HI_FTYPE_V2SI_V2SI:
28759 case V4DF_FTYPE_V4DF_V4DF:
28760 case V4DF_FTYPE_V4DF_V4DI:
28761 case V4SF_FTYPE_V4SF_V4SF:
28762 case V4SF_FTYPE_V4SF_V4SI:
28763 case V4SF_FTYPE_V4SF_V2SI:
28764 case V4SF_FTYPE_V4SF_V2DF:
28765 case V4SF_FTYPE_V4SF_DI:
28766 case V4SF_FTYPE_V4SF_SI:
28767 case V2DI_FTYPE_V2DI_V2DI:
28768 case V2DI_FTYPE_V16QI_V16QI:
28769 case V2DI_FTYPE_V4SI_V4SI:
28770 case V2DI_FTYPE_V2DI_V16QI:
28771 case V2DI_FTYPE_V2DF_V2DF:
28772 case V2SI_FTYPE_V2SI_V2SI:
28773 case V2SI_FTYPE_V4HI_V4HI:
28774 case V2SI_FTYPE_V2SF_V2SF:
28775 case V2DF_FTYPE_V2DF_V2DF:
28776 case V2DF_FTYPE_V2DF_V4SF:
28777 case V2DF_FTYPE_V2DF_V2DI:
28778 case V2DF_FTYPE_V2DF_DI:
28779 case V2DF_FTYPE_V2DF_SI:
28780 case V2SF_FTYPE_V2SF_V2SF:
28781 case V1DI_FTYPE_V1DI_V1DI:
28782 case V1DI_FTYPE_V8QI_V8QI:
28783 case V1DI_FTYPE_V2SI_V2SI:
28784 case V32QI_FTYPE_V16HI_V16HI:
28785 case V16HI_FTYPE_V8SI_V8SI:
28786 case V32QI_FTYPE_V32QI_V32QI:
28787 case V16HI_FTYPE_V32QI_V32QI:
28788 case V16HI_FTYPE_V16HI_V16HI:
28789 case V8SI_FTYPE_V4DF_V4DF:
28790 case V8SI_FTYPE_V8SI_V8SI:
28791 case V8SI_FTYPE_V16HI_V16HI:
28792 case V4DI_FTYPE_V4DI_V4DI:
28793 case V4DI_FTYPE_V8SI_V8SI:
28794 if (comparison == UNKNOWN)
28795 return ix86_expand_binop_builtin (icode, exp, target);
28798 case V4SF_FTYPE_V4SF_V4SF_SWAP:
28799 case V2DF_FTYPE_V2DF_V2DF_SWAP:
28800 gcc_assert (comparison != UNKNOWN);
28804 case V16HI_FTYPE_V16HI_V8HI_COUNT:
28805 case V16HI_FTYPE_V16HI_SI_COUNT:
28806 case V8SI_FTYPE_V8SI_V4SI_COUNT:
28807 case V8SI_FTYPE_V8SI_SI_COUNT:
28808 case V4DI_FTYPE_V4DI_V2DI_COUNT:
28809 case V4DI_FTYPE_V4DI_INT_COUNT:
28810 case V8HI_FTYPE_V8HI_V8HI_COUNT:
28811 case V8HI_FTYPE_V8HI_SI_COUNT:
28812 case V4SI_FTYPE_V4SI_V4SI_COUNT:
28813 case V4SI_FTYPE_V4SI_SI_COUNT:
28814 case V4HI_FTYPE_V4HI_V4HI_COUNT:
28815 case V4HI_FTYPE_V4HI_SI_COUNT:
28816 case V2DI_FTYPE_V2DI_V2DI_COUNT:
28817 case V2DI_FTYPE_V2DI_SI_COUNT:
28818 case V2SI_FTYPE_V2SI_V2SI_COUNT:
28819 case V2SI_FTYPE_V2SI_SI_COUNT:
28820 case V1DI_FTYPE_V1DI_V1DI_COUNT:
28821 case V1DI_FTYPE_V1DI_SI_COUNT:
28823 last_arg_count = true;
28825 case UINT64_FTYPE_UINT64_UINT64:
28826 case UINT_FTYPE_UINT_UINT:
28827 case UINT_FTYPE_UINT_USHORT:
28828 case UINT_FTYPE_UINT_UCHAR:
28829 case UINT16_FTYPE_UINT16_INT:
28830 case UINT8_FTYPE_UINT8_INT:
28833 case V2DI_FTYPE_V2DI_INT_CONVERT:
28836 nargs_constant = 1;
28838 case V4DI_FTYPE_V4DI_INT_CONVERT:
28841 nargs_constant = 1;
28843 case V8HI_FTYPE_V8HI_INT:
28844 case V8HI_FTYPE_V8SF_INT:
28845 case V8HI_FTYPE_V4SF_INT:
28846 case V8SF_FTYPE_V8SF_INT:
28847 case V4SI_FTYPE_V4SI_INT:
28848 case V4SI_FTYPE_V8SI_INT:
28849 case V4HI_FTYPE_V4HI_INT:
28850 case V4DF_FTYPE_V4DF_INT:
28851 case V4SF_FTYPE_V4SF_INT:
28852 case V4SF_FTYPE_V8SF_INT:
28853 case V2DI_FTYPE_V2DI_INT:
28854 case V2DF_FTYPE_V2DF_INT:
28855 case V2DF_FTYPE_V4DF_INT:
28856 case V16HI_FTYPE_V16HI_INT:
28857 case V8SI_FTYPE_V8SI_INT:
28858 case V4DI_FTYPE_V4DI_INT:
28859 case V2DI_FTYPE_V4DI_INT:
28861 nargs_constant = 1;
28863 case V16QI_FTYPE_V16QI_V16QI_V16QI:
28864 case V8SF_FTYPE_V8SF_V8SF_V8SF:
28865 case V4DF_FTYPE_V4DF_V4DF_V4DF:
28866 case V4SF_FTYPE_V4SF_V4SF_V4SF:
28867 case V2DF_FTYPE_V2DF_V2DF_V2DF:
28868 case V32QI_FTYPE_V32QI_V32QI_V32QI:
28871 case V32QI_FTYPE_V32QI_V32QI_INT:
28872 case V16HI_FTYPE_V16HI_V16HI_INT:
28873 case V16QI_FTYPE_V16QI_V16QI_INT:
28874 case V4DI_FTYPE_V4DI_V4DI_INT:
28875 case V8HI_FTYPE_V8HI_V8HI_INT:
28876 case V8SI_FTYPE_V8SI_V8SI_INT:
28877 case V8SI_FTYPE_V8SI_V4SI_INT:
28878 case V8SF_FTYPE_V8SF_V8SF_INT:
28879 case V8SF_FTYPE_V8SF_V4SF_INT:
28880 case V4SI_FTYPE_V4SI_V4SI_INT:
28881 case V4DF_FTYPE_V4DF_V4DF_INT:
28882 case V4DF_FTYPE_V4DF_V2DF_INT:
28883 case V4SF_FTYPE_V4SF_V4SF_INT:
28884 case V2DI_FTYPE_V2DI_V2DI_INT:
28885 case V4DI_FTYPE_V4DI_V2DI_INT:
28886 case V2DF_FTYPE_V2DF_V2DF_INT:
28888 nargs_constant = 1;
28890 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
28893 nargs_constant = 1;
28895 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
28898 nargs_constant = 1;
28900 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
28903 nargs_constant = 1;
28905 case V2DI_FTYPE_V2DI_UINT_UINT:
28907 nargs_constant = 2;
28909 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
28910 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
28911 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
28912 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
28914 nargs_constant = 1;
28916 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
28918 nargs_constant = 2;
28921 gcc_unreachable ();
28924 gcc_assert (nargs <= ARRAY_SIZE (args));
28926 if (comparison != UNKNOWN)
28928 gcc_assert (nargs == 2);
28929 return ix86_expand_sse_compare (d, exp, target, swap);
28932 if (rmode == VOIDmode || rmode == tmode)
28936 || GET_MODE (target) != tmode
28937 || !insn_p->operand[0].predicate (target, tmode))
28938 target = gen_reg_rtx (tmode);
28939 real_target = target;
28943 target = gen_reg_rtx (rmode);
28944 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
28947 for (i = 0; i < nargs; i++)
28949 tree arg = CALL_EXPR_ARG (exp, i);
28950 rtx op = expand_normal (arg);
28951 enum machine_mode mode = insn_p->operand[i + 1].mode;
28952 bool match = insn_p->operand[i + 1].predicate (op, mode);
28954 if (last_arg_count && (i + 1) == nargs)
28956 /* SIMD shift insns take either an 8-bit immediate or
28957 register as count. But builtin functions take int as
28958 count. If count doesn't match, we put it in register. */
28961 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
28962 if (!insn_p->operand[i + 1].predicate (op, mode))
28963 op = copy_to_reg (op);
28966 else if ((nargs - i) <= nargs_constant)
28971 case CODE_FOR_avx2_inserti128:
28972 case CODE_FOR_avx2_extracti128:
28973 error ("the last argument must be an 1-bit immediate");
28976 case CODE_FOR_sse4_1_roundsd:
28977 case CODE_FOR_sse4_1_roundss:
28979 case CODE_FOR_sse4_1_roundpd:
28980 case CODE_FOR_sse4_1_roundps:
28981 case CODE_FOR_avx_roundpd256:
28982 case CODE_FOR_avx_roundps256:
28984 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
28985 case CODE_FOR_sse4_1_roundps_sfix:
28986 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
28987 case CODE_FOR_avx_roundps_sfix256:
28989 case CODE_FOR_sse4_1_blendps:
28990 case CODE_FOR_avx_blendpd256:
28991 case CODE_FOR_avx_vpermilv4df:
28992 error ("the last argument must be a 4-bit immediate");
28995 case CODE_FOR_sse4_1_blendpd:
28996 case CODE_FOR_avx_vpermilv2df:
28997 case CODE_FOR_xop_vpermil2v2df3:
28998 case CODE_FOR_xop_vpermil2v4sf3:
28999 case CODE_FOR_xop_vpermil2v4df3:
29000 case CODE_FOR_xop_vpermil2v8sf3:
29001 error ("the last argument must be a 2-bit immediate");
29004 case CODE_FOR_avx_vextractf128v4df:
29005 case CODE_FOR_avx_vextractf128v8sf:
29006 case CODE_FOR_avx_vextractf128v8si:
29007 case CODE_FOR_avx_vinsertf128v4df:
29008 case CODE_FOR_avx_vinsertf128v8sf:
29009 case CODE_FOR_avx_vinsertf128v8si:
29010 error ("the last argument must be a 1-bit immediate");
29013 case CODE_FOR_avx_vmcmpv2df3:
29014 case CODE_FOR_avx_vmcmpv4sf3:
29015 case CODE_FOR_avx_cmpv2df3:
29016 case CODE_FOR_avx_cmpv4sf3:
29017 case CODE_FOR_avx_cmpv4df3:
29018 case CODE_FOR_avx_cmpv8sf3:
29019 error ("the last argument must be a 5-bit immediate");
29023 switch (nargs_constant)
29026 if ((nargs - i) == nargs_constant)
29028 error ("the next to last argument must be an 8-bit immediate");
29032 error ("the last argument must be an 8-bit immediate");
29035 gcc_unreachable ();
29042 if (VECTOR_MODE_P (mode))
29043 op = safe_vector_operand (op, mode);
29045 /* If we aren't optimizing, only allow one memory operand to
29047 if (memory_operand (op, mode))
29050 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
29052 if (optimize || !match || num_memory > 1)
29053 op = copy_to_mode_reg (mode, op);
29057 op = copy_to_reg (op);
29058 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
29063 args[i].mode = mode;
29069 pat = GEN_FCN (icode) (real_target, args[0].op);
29072 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
29075 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
29079 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
29080 args[2].op, args[3].op);
29083 gcc_unreachable ();
29093 /* Subroutine of ix86_expand_builtin to take care of special insns
29094 with variable number of operands. */
29097 ix86_expand_special_args_builtin (const struct builtin_description *d,
29098 tree exp, rtx target)
29102 unsigned int i, nargs, arg_adjust, memory;
29106 enum machine_mode mode;
29108 enum insn_code icode = d->icode;
29109 bool last_arg_constant = false;
29110 const struct insn_data_d *insn_p = &insn_data[icode];
29111 enum machine_mode tmode = insn_p->operand[0].mode;
29112 enum { load, store } klass;
29114 switch ((enum ix86_builtin_func_type) d->flag)
29116 case VOID_FTYPE_VOID:
29117 if (icode == CODE_FOR_avx_vzeroupper)
29118 target = GEN_INT (vzeroupper_intrinsic);
29119 emit_insn (GEN_FCN (icode) (target));
29121 case VOID_FTYPE_UINT64:
29122 case VOID_FTYPE_UNSIGNED:
29127 case UINT64_FTYPE_VOID:
29128 case UNSIGNED_FTYPE_VOID:
29133 case UINT64_FTYPE_PUNSIGNED:
29134 case V2DI_FTYPE_PV2DI:
29135 case V4DI_FTYPE_PV4DI:
29136 case V32QI_FTYPE_PCCHAR:
29137 case V16QI_FTYPE_PCCHAR:
29138 case V8SF_FTYPE_PCV4SF:
29139 case V8SF_FTYPE_PCFLOAT:
29140 case V4SF_FTYPE_PCFLOAT:
29141 case V4DF_FTYPE_PCV2DF:
29142 case V4DF_FTYPE_PCDOUBLE:
29143 case V2DF_FTYPE_PCDOUBLE:
29144 case VOID_FTYPE_PVOID:
29149 case VOID_FTYPE_PV2SF_V4SF:
29150 case VOID_FTYPE_PV4DI_V4DI:
29151 case VOID_FTYPE_PV2DI_V2DI:
29152 case VOID_FTYPE_PCHAR_V32QI:
29153 case VOID_FTYPE_PCHAR_V16QI:
29154 case VOID_FTYPE_PFLOAT_V8SF:
29155 case VOID_FTYPE_PFLOAT_V4SF:
29156 case VOID_FTYPE_PDOUBLE_V4DF:
29157 case VOID_FTYPE_PDOUBLE_V2DF:
29158 case VOID_FTYPE_PLONGLONG_LONGLONG:
29159 case VOID_FTYPE_PULONGLONG_ULONGLONG:
29160 case VOID_FTYPE_PINT_INT:
29163 /* Reserve memory operand for target. */
29164 memory = ARRAY_SIZE (args);
29166 case V4SF_FTYPE_V4SF_PCV2SF:
29167 case V2DF_FTYPE_V2DF_PCDOUBLE:
29172 case V8SF_FTYPE_PCV8SF_V8SI:
29173 case V4DF_FTYPE_PCV4DF_V4DI:
29174 case V4SF_FTYPE_PCV4SF_V4SI:
29175 case V2DF_FTYPE_PCV2DF_V2DI:
29176 case V8SI_FTYPE_PCV8SI_V8SI:
29177 case V4DI_FTYPE_PCV4DI_V4DI:
29178 case V4SI_FTYPE_PCV4SI_V4SI:
29179 case V2DI_FTYPE_PCV2DI_V2DI:
29184 case VOID_FTYPE_PV8SF_V8SI_V8SF:
29185 case VOID_FTYPE_PV4DF_V4DI_V4DF:
29186 case VOID_FTYPE_PV4SF_V4SI_V4SF:
29187 case VOID_FTYPE_PV2DF_V2DI_V2DF:
29188 case VOID_FTYPE_PV8SI_V8SI_V8SI:
29189 case VOID_FTYPE_PV4DI_V4DI_V4DI:
29190 case VOID_FTYPE_PV4SI_V4SI_V4SI:
29191 case VOID_FTYPE_PV2DI_V2DI_V2DI:
29194 /* Reserve memory operand for target. */
29195 memory = ARRAY_SIZE (args);
29197 case VOID_FTYPE_UINT_UINT_UINT:
29198 case VOID_FTYPE_UINT64_UINT_UINT:
29199 case UCHAR_FTYPE_UINT_UINT_UINT:
29200 case UCHAR_FTYPE_UINT64_UINT_UINT:
29203 memory = ARRAY_SIZE (args);
29204 last_arg_constant = true;
29207 gcc_unreachable ();
29210 gcc_assert (nargs <= ARRAY_SIZE (args));
29212 if (klass == store)
29214 arg = CALL_EXPR_ARG (exp, 0);
29215 op = expand_normal (arg);
29216 gcc_assert (target == 0);
29219 if (GET_MODE (op) != Pmode)
29220 op = convert_to_mode (Pmode, op, 1);
29221 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
29224 target = force_reg (tmode, op);
29232 || !register_operand (target, tmode)
29233 || GET_MODE (target) != tmode)
29234 target = gen_reg_rtx (tmode);
29237 for (i = 0; i < nargs; i++)
29239 enum machine_mode mode = insn_p->operand[i + 1].mode;
29242 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
29243 op = expand_normal (arg);
29244 match = insn_p->operand[i + 1].predicate (op, mode);
29246 if (last_arg_constant && (i + 1) == nargs)
29250 if (icode == CODE_FOR_lwp_lwpvalsi3
29251 || icode == CODE_FOR_lwp_lwpinssi3
29252 || icode == CODE_FOR_lwp_lwpvaldi3
29253 || icode == CODE_FOR_lwp_lwpinsdi3)
29254 error ("the last argument must be a 32-bit immediate");
29256 error ("the last argument must be an 8-bit immediate");
29264 /* This must be the memory operand. */
29265 if (GET_MODE (op) != Pmode)
29266 op = convert_to_mode (Pmode, op, 1);
29267 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
29268 gcc_assert (GET_MODE (op) == mode
29269 || GET_MODE (op) == VOIDmode);
29273 /* This must be register. */
29274 if (VECTOR_MODE_P (mode))
29275 op = safe_vector_operand (op, mode);
29277 gcc_assert (GET_MODE (op) == mode
29278 || GET_MODE (op) == VOIDmode);
29279 op = copy_to_mode_reg (mode, op);
29284 args[i].mode = mode;
29290 pat = GEN_FCN (icode) (target);
29293 pat = GEN_FCN (icode) (target, args[0].op);
29296 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
29299 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
29302 gcc_unreachable ();
29308 return klass == store ? 0 : target;
29311 /* Return the integer constant in ARG. Constrain it to be in the range
29312 of the subparts of VEC_TYPE; issue an error if not. */
29315 get_element_number (tree vec_type, tree arg)
29317 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
29319 if (!host_integerp (arg, 1)
29320 || (elt = tree_low_cst (arg, 1), elt > max))
29322 error ("selector must be an integer constant in the range 0..%wi", max);
29329 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29330 ix86_expand_vector_init. We DO have language-level syntax for this, in
29331 the form of (type){ init-list }. Except that since we can't place emms
29332 instructions from inside the compiler, we can't allow the use of MMX
29333 registers unless the user explicitly asks for it. So we do *not* define
29334 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29335 we have builtins invoked by mmintrin.h that gives us license to emit
29336 these sorts of instructions. */
29339 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
29341 enum machine_mode tmode = TYPE_MODE (type);
29342 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
29343 int i, n_elt = GET_MODE_NUNITS (tmode);
29344 rtvec v = rtvec_alloc (n_elt);
29346 gcc_assert (VECTOR_MODE_P (tmode));
29347 gcc_assert (call_expr_nargs (exp) == n_elt);
29349 for (i = 0; i < n_elt; ++i)
29351 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
29352 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
29355 if (!target || !register_operand (target, tmode))
29356 target = gen_reg_rtx (tmode);
29358 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
29362 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29363 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29364 had a language-level syntax for referencing vector elements. */
29367 ix86_expand_vec_ext_builtin (tree exp, rtx target)
29369 enum machine_mode tmode, mode0;
29374 arg0 = CALL_EXPR_ARG (exp, 0);
29375 arg1 = CALL_EXPR_ARG (exp, 1);
29377 op0 = expand_normal (arg0);
29378 elt = get_element_number (TREE_TYPE (arg0), arg1);
29380 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
29381 mode0 = TYPE_MODE (TREE_TYPE (arg0));
29382 gcc_assert (VECTOR_MODE_P (mode0));
29384 op0 = force_reg (mode0, op0);
29386 if (optimize || !target || !register_operand (target, tmode))
29387 target = gen_reg_rtx (tmode);
29389 ix86_expand_vector_extract (true, target, op0, elt);
29394 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29395 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29396 a language-level syntax for referencing vector elements. */
29399 ix86_expand_vec_set_builtin (tree exp)
29401 enum machine_mode tmode, mode1;
29402 tree arg0, arg1, arg2;
29404 rtx op0, op1, target;
29406 arg0 = CALL_EXPR_ARG (exp, 0);
29407 arg1 = CALL_EXPR_ARG (exp, 1);
29408 arg2 = CALL_EXPR_ARG (exp, 2);
29410 tmode = TYPE_MODE (TREE_TYPE (arg0));
29411 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
29412 gcc_assert (VECTOR_MODE_P (tmode));
29414 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
29415 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
29416 elt = get_element_number (TREE_TYPE (arg0), arg2);
29418 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
29419 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
29421 op0 = force_reg (tmode, op0);
29422 op1 = force_reg (mode1, op1);
29424 /* OP0 is the source of these builtin functions and shouldn't be
29425 modified. Create a copy, use it and return it as target. */
29426 target = gen_reg_rtx (tmode);
29427 emit_move_insn (target, op0);
29428 ix86_expand_vector_set (true, target, op1, elt);
29433 /* Expand an expression EXP that calls a built-in function,
29434 with result going to TARGET if that's convenient
29435 (and in mode MODE if that's convenient).
29436 SUBTARGET may be used as the target for computing one of EXP's operands.
29437 IGNORE is nonzero if the value is to be ignored. */
29440 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
29441 enum machine_mode mode ATTRIBUTE_UNUSED,
29442 int ignore ATTRIBUTE_UNUSED)
29444 const struct builtin_description *d;
29446 enum insn_code icode;
29447 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
29448 tree arg0, arg1, arg2, arg3, arg4;
29449 rtx op0, op1, op2, op3, op4, pat;
29450 enum machine_mode mode0, mode1, mode2, mode3, mode4;
29451 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
29453 /* Determine whether the builtin function is available under the current ISA.
29454 Originally the builtin was not created if it wasn't applicable to the
29455 current ISA based on the command line switches. With function specific
29456 options, we need to check in the context of the function making the call
29457 whether it is supported. */
29458 if (ix86_builtins_isa[fcode].isa
29459 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
29461 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
29462 NULL, (enum fpmath_unit) 0, false);
29465 error ("%qE needs unknown isa option", fndecl);
29468 gcc_assert (opts != NULL);
29469 error ("%qE needs isa option %s", fndecl, opts);
29477 case IX86_BUILTIN_MASKMOVQ:
29478 case IX86_BUILTIN_MASKMOVDQU:
29479 icode = (fcode == IX86_BUILTIN_MASKMOVQ
29480 ? CODE_FOR_mmx_maskmovq
29481 : CODE_FOR_sse2_maskmovdqu);
29482 /* Note the arg order is different from the operand order. */
29483 arg1 = CALL_EXPR_ARG (exp, 0);
29484 arg2 = CALL_EXPR_ARG (exp, 1);
29485 arg0 = CALL_EXPR_ARG (exp, 2);
29486 op0 = expand_normal (arg0);
29487 op1 = expand_normal (arg1);
29488 op2 = expand_normal (arg2);
29489 mode0 = insn_data[icode].operand[0].mode;
29490 mode1 = insn_data[icode].operand[1].mode;
29491 mode2 = insn_data[icode].operand[2].mode;
29493 if (GET_MODE (op0) != Pmode)
29494 op0 = convert_to_mode (Pmode, op0, 1);
29495 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
29497 if (!insn_data[icode].operand[0].predicate (op0, mode0))
29498 op0 = copy_to_mode_reg (mode0, op0);
29499 if (!insn_data[icode].operand[1].predicate (op1, mode1))
29500 op1 = copy_to_mode_reg (mode1, op1);
29501 if (!insn_data[icode].operand[2].predicate (op2, mode2))
29502 op2 = copy_to_mode_reg (mode2, op2);
29503 pat = GEN_FCN (icode) (op0, op1, op2);
29509 case IX86_BUILTIN_LDMXCSR:
29510 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
29511 target = assign_386_stack_local (SImode, SLOT_TEMP);
29512 emit_move_insn (target, op0);
29513 emit_insn (gen_sse_ldmxcsr (target));
29516 case IX86_BUILTIN_STMXCSR:
29517 target = assign_386_stack_local (SImode, SLOT_TEMP);
29518 emit_insn (gen_sse_stmxcsr (target));
29519 return copy_to_mode_reg (SImode, target);
29521 case IX86_BUILTIN_CLFLUSH:
29522 arg0 = CALL_EXPR_ARG (exp, 0);
29523 op0 = expand_normal (arg0);
29524 icode = CODE_FOR_sse2_clflush;
29525 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
29527 if (GET_MODE (op0) != Pmode)
29528 op0 = convert_to_mode (Pmode, op0, 1);
29529 op0 = force_reg (Pmode, op0);
29532 emit_insn (gen_sse2_clflush (op0));
29535 case IX86_BUILTIN_MONITOR:
29536 arg0 = CALL_EXPR_ARG (exp, 0);
29537 arg1 = CALL_EXPR_ARG (exp, 1);
29538 arg2 = CALL_EXPR_ARG (exp, 2);
29539 op0 = expand_normal (arg0);
29540 op1 = expand_normal (arg1);
29541 op2 = expand_normal (arg2);
29544 if (GET_MODE (op0) != Pmode)
29545 op0 = convert_to_mode (Pmode, op0, 1);
29546 op0 = force_reg (Pmode, op0);
29549 op1 = copy_to_mode_reg (SImode, op1);
29551 op2 = copy_to_mode_reg (SImode, op2);
29552 emit_insn (ix86_gen_monitor (op0, op1, op2));
29555 case IX86_BUILTIN_MWAIT:
29556 arg0 = CALL_EXPR_ARG (exp, 0);
29557 arg1 = CALL_EXPR_ARG (exp, 1);
29558 op0 = expand_normal (arg0);
29559 op1 = expand_normal (arg1);
29561 op0 = copy_to_mode_reg (SImode, op0);
29563 op1 = copy_to_mode_reg (SImode, op1);
29564 emit_insn (gen_sse3_mwait (op0, op1));
29567 case IX86_BUILTIN_VEC_INIT_V2SI:
29568 case IX86_BUILTIN_VEC_INIT_V4HI:
29569 case IX86_BUILTIN_VEC_INIT_V8QI:
29570 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
29572 case IX86_BUILTIN_VEC_EXT_V2DF:
29573 case IX86_BUILTIN_VEC_EXT_V2DI:
29574 case IX86_BUILTIN_VEC_EXT_V4SF:
29575 case IX86_BUILTIN_VEC_EXT_V4SI:
29576 case IX86_BUILTIN_VEC_EXT_V8HI:
29577 case IX86_BUILTIN_VEC_EXT_V2SI:
29578 case IX86_BUILTIN_VEC_EXT_V4HI:
29579 case IX86_BUILTIN_VEC_EXT_V16QI:
29580 return ix86_expand_vec_ext_builtin (exp, target);
29582 case IX86_BUILTIN_VEC_SET_V2DI:
29583 case IX86_BUILTIN_VEC_SET_V4SF:
29584 case IX86_BUILTIN_VEC_SET_V4SI:
29585 case IX86_BUILTIN_VEC_SET_V8HI:
29586 case IX86_BUILTIN_VEC_SET_V4HI:
29587 case IX86_BUILTIN_VEC_SET_V16QI:
29588 return ix86_expand_vec_set_builtin (exp);
29590 case IX86_BUILTIN_INFQ:
29591 case IX86_BUILTIN_HUGE_VALQ:
29593 REAL_VALUE_TYPE inf;
29597 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
29599 tmp = validize_mem (force_const_mem (mode, tmp));
29602 target = gen_reg_rtx (mode);
29604 emit_move_insn (target, tmp);
29608 case IX86_BUILTIN_LLWPCB:
29609 arg0 = CALL_EXPR_ARG (exp, 0);
29610 op0 = expand_normal (arg0);
29611 icode = CODE_FOR_lwp_llwpcb;
29612 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
29614 if (GET_MODE (op0) != Pmode)
29615 op0 = convert_to_mode (Pmode, op0, 1);
29616 op0 = force_reg (Pmode, op0);
29618 emit_insn (gen_lwp_llwpcb (op0));
29621 case IX86_BUILTIN_SLWPCB:
29622 icode = CODE_FOR_lwp_slwpcb;
29624 || !insn_data[icode].operand[0].predicate (target, Pmode))
29625 target = gen_reg_rtx (Pmode);
29626 emit_insn (gen_lwp_slwpcb (target));
29629 case IX86_BUILTIN_BEXTRI32:
29630 case IX86_BUILTIN_BEXTRI64:
29631 arg0 = CALL_EXPR_ARG (exp, 0);
29632 arg1 = CALL_EXPR_ARG (exp, 1);
29633 op0 = expand_normal (arg0);
29634 op1 = expand_normal (arg1);
29635 icode = (fcode == IX86_BUILTIN_BEXTRI32
29636 ? CODE_FOR_tbm_bextri_si
29637 : CODE_FOR_tbm_bextri_di);
29638 if (!CONST_INT_P (op1))
29640 error ("last argument must be an immediate");
29645 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
29646 unsigned char lsb_index = INTVAL (op1) & 0xFF;
29647 op1 = GEN_INT (length);
29648 op2 = GEN_INT (lsb_index);
29649 pat = GEN_FCN (icode) (target, op0, op1, op2);
29655 case IX86_BUILTIN_RDRAND16_STEP:
29656 icode = CODE_FOR_rdrandhi_1;
29660 case IX86_BUILTIN_RDRAND32_STEP:
29661 icode = CODE_FOR_rdrandsi_1;
29665 case IX86_BUILTIN_RDRAND64_STEP:
29666 icode = CODE_FOR_rdranddi_1;
29670 op0 = gen_reg_rtx (mode0);
29671 emit_insn (GEN_FCN (icode) (op0));
29673 arg0 = CALL_EXPR_ARG (exp, 0);
29674 op1 = expand_normal (arg0);
29675 if (!address_operand (op1, VOIDmode))
29677 op1 = convert_memory_address (Pmode, op1);
29678 op1 = copy_addr_to_reg (op1);
29680 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
29682 op1 = gen_reg_rtx (SImode);
29683 emit_move_insn (op1, CONST1_RTX (SImode));
29685 /* Emit SImode conditional move. */
29686 if (mode0 == HImode)
29688 op2 = gen_reg_rtx (SImode);
29689 emit_insn (gen_zero_extendhisi2 (op2, op0));
29691 else if (mode0 == SImode)
29694 op2 = gen_rtx_SUBREG (SImode, op0, 0);
29697 target = gen_reg_rtx (SImode);
29699 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
29701 emit_insn (gen_rtx_SET (VOIDmode, target,
29702 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
29705 case IX86_BUILTIN_GATHERSIV2DF:
29706 icode = CODE_FOR_avx2_gathersiv2df;
29708 case IX86_BUILTIN_GATHERSIV4DF:
29709 icode = CODE_FOR_avx2_gathersiv4df;
29711 case IX86_BUILTIN_GATHERDIV2DF:
29712 icode = CODE_FOR_avx2_gatherdiv2df;
29714 case IX86_BUILTIN_GATHERDIV4DF:
29715 icode = CODE_FOR_avx2_gatherdiv4df;
29717 case IX86_BUILTIN_GATHERSIV4SF:
29718 icode = CODE_FOR_avx2_gathersiv4sf;
29720 case IX86_BUILTIN_GATHERSIV8SF:
29721 icode = CODE_FOR_avx2_gathersiv8sf;
29723 case IX86_BUILTIN_GATHERDIV4SF:
29724 icode = CODE_FOR_avx2_gatherdiv4sf;
29726 case IX86_BUILTIN_GATHERDIV8SF:
29727 icode = CODE_FOR_avx2_gatherdiv8sf;
29729 case IX86_BUILTIN_GATHERSIV2DI:
29730 icode = CODE_FOR_avx2_gathersiv2di;
29732 case IX86_BUILTIN_GATHERSIV4DI:
29733 icode = CODE_FOR_avx2_gathersiv4di;
29735 case IX86_BUILTIN_GATHERDIV2DI:
29736 icode = CODE_FOR_avx2_gatherdiv2di;
29738 case IX86_BUILTIN_GATHERDIV4DI:
29739 icode = CODE_FOR_avx2_gatherdiv4di;
29741 case IX86_BUILTIN_GATHERSIV4SI:
29742 icode = CODE_FOR_avx2_gathersiv4si;
29744 case IX86_BUILTIN_GATHERSIV8SI:
29745 icode = CODE_FOR_avx2_gathersiv8si;
29747 case IX86_BUILTIN_GATHERDIV4SI:
29748 icode = CODE_FOR_avx2_gatherdiv4si;
29750 case IX86_BUILTIN_GATHERDIV8SI:
29751 icode = CODE_FOR_avx2_gatherdiv8si;
29753 case IX86_BUILTIN_GATHERALTSIV4DF:
29754 icode = CODE_FOR_avx2_gathersiv4df;
29756 case IX86_BUILTIN_GATHERALTDIV8SF:
29757 icode = CODE_FOR_avx2_gatherdiv8sf;
29759 case IX86_BUILTIN_GATHERALTSIV4DI:
29760 icode = CODE_FOR_avx2_gathersiv4di;
29762 case IX86_BUILTIN_GATHERALTDIV8SI:
29763 icode = CODE_FOR_avx2_gatherdiv8si;
29767 arg0 = CALL_EXPR_ARG (exp, 0);
29768 arg1 = CALL_EXPR_ARG (exp, 1);
29769 arg2 = CALL_EXPR_ARG (exp, 2);
29770 arg3 = CALL_EXPR_ARG (exp, 3);
29771 arg4 = CALL_EXPR_ARG (exp, 4);
29772 op0 = expand_normal (arg0);
29773 op1 = expand_normal (arg1);
29774 op2 = expand_normal (arg2);
29775 op3 = expand_normal (arg3);
29776 op4 = expand_normal (arg4);
29777 /* Note the arg order is different from the operand order. */
29778 mode0 = insn_data[icode].operand[1].mode;
29779 mode2 = insn_data[icode].operand[3].mode;
29780 mode3 = insn_data[icode].operand[4].mode;
29781 mode4 = insn_data[icode].operand[5].mode;
29783 if (target == NULL_RTX
29784 || GET_MODE (target) != insn_data[icode].operand[0].mode
29785 || !insn_data[icode].operand[0].predicate (target,
29786 GET_MODE (target)))
29787 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
29789 subtarget = target;
29791 if (fcode == IX86_BUILTIN_GATHERALTSIV4DF
29792 || fcode == IX86_BUILTIN_GATHERALTSIV4DI)
29794 rtx half = gen_reg_rtx (V4SImode);
29795 if (!nonimmediate_operand (op2, V8SImode))
29796 op2 = copy_to_mode_reg (V8SImode, op2);
29797 emit_insn (gen_vec_extract_lo_v8si (half, op2));
29800 else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF
29801 || fcode == IX86_BUILTIN_GATHERALTDIV8SI)
29803 rtx (*gen) (rtx, rtx);
29804 rtx half = gen_reg_rtx (mode0);
29805 if (mode0 == V4SFmode)
29806 gen = gen_vec_extract_lo_v8sf;
29808 gen = gen_vec_extract_lo_v8si;
29809 if (!nonimmediate_operand (op0, GET_MODE (op0)))
29810 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
29811 emit_insn (gen (half, op0));
29813 if (!nonimmediate_operand (op3, GET_MODE (op3)))
29814 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
29815 emit_insn (gen (half, op3));
29819 /* Force memory operand only with base register here. But we
29820 don't want to do it on memory operand for other builtin
29822 if (GET_MODE (op1) != Pmode)
29823 op1 = convert_to_mode (Pmode, op1, 1);
29824 op1 = force_reg (Pmode, op1);
29826 if (!insn_data[icode].operand[1].predicate (op0, mode0))
29827 op0 = copy_to_mode_reg (mode0, op0);
29828 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
29829 op1 = copy_to_mode_reg (Pmode, op1);
29830 if (!insn_data[icode].operand[3].predicate (op2, mode2))
29831 op2 = copy_to_mode_reg (mode2, op2);
29832 if (!insn_data[icode].operand[4].predicate (op3, mode3))
29833 op3 = copy_to_mode_reg (mode3, op3);
29834 if (!insn_data[icode].operand[5].predicate (op4, mode4))
29836 error ("last argument must be scale 1, 2, 4, 8");
29840 /* Optimize. If mask is known to have all high bits set,
29841 replace op0 with pc_rtx to signal that the instruction
29842 overwrites the whole destination and doesn't use its
29843 previous contents. */
29846 if (TREE_CODE (arg3) == VECTOR_CST)
29849 unsigned int negative = 0;
29850 for (elt = TREE_VECTOR_CST_ELTS (arg3);
29851 elt; elt = TREE_CHAIN (elt))
29853 tree cst = TREE_VALUE (elt);
29854 if (TREE_CODE (cst) == INTEGER_CST
29855 && tree_int_cst_sign_bit (cst))
29857 else if (TREE_CODE (cst) == REAL_CST
29858 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
29861 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
29864 else if (TREE_CODE (arg3) == SSA_NAME)
29866 /* Recognize also when mask is like:
29867 __v2df src = _mm_setzero_pd ();
29868 __v2df mask = _mm_cmpeq_pd (src, src);
29870 __v8sf src = _mm256_setzero_ps ();
29871 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
29872 as that is a cheaper way to load all ones into
29873 a register than having to load a constant from
29875 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
29876 if (is_gimple_call (def_stmt))
29878 tree fndecl = gimple_call_fndecl (def_stmt);
29880 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
29881 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
29883 case IX86_BUILTIN_CMPPD:
29884 case IX86_BUILTIN_CMPPS:
29885 case IX86_BUILTIN_CMPPD256:
29886 case IX86_BUILTIN_CMPPS256:
29887 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
29890 case IX86_BUILTIN_CMPEQPD:
29891 case IX86_BUILTIN_CMPEQPS:
29892 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
29893 && initializer_zerop (gimple_call_arg (def_stmt,
29904 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
29909 if (fcode == IX86_BUILTIN_GATHERDIV8SF
29910 || fcode == IX86_BUILTIN_GATHERDIV8SI)
29912 enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode
29913 ? V4SFmode : V4SImode;
29914 if (target == NULL_RTX)
29915 target = gen_reg_rtx (tmode);
29916 if (tmode == V4SFmode)
29917 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
29919 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
29922 target = subtarget;
29930 for (i = 0, d = bdesc_special_args;
29931 i < ARRAY_SIZE (bdesc_special_args);
29933 if (d->code == fcode)
29934 return ix86_expand_special_args_builtin (d, exp, target);
29936 for (i = 0, d = bdesc_args;
29937 i < ARRAY_SIZE (bdesc_args);
29939 if (d->code == fcode)
29942 case IX86_BUILTIN_FABSQ:
29943 case IX86_BUILTIN_COPYSIGNQ:
29945 /* Emit a normal call if SSE2 isn't available. */
29946 return expand_call (exp, target, ignore);
29948 return ix86_expand_args_builtin (d, exp, target);
29951 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
29952 if (d->code == fcode)
29953 return ix86_expand_sse_comi (d, exp, target);
29955 for (i = 0, d = bdesc_pcmpestr;
29956 i < ARRAY_SIZE (bdesc_pcmpestr);
29958 if (d->code == fcode)
29959 return ix86_expand_sse_pcmpestr (d, exp, target);
29961 for (i = 0, d = bdesc_pcmpistr;
29962 i < ARRAY_SIZE (bdesc_pcmpistr);
29964 if (d->code == fcode)
29965 return ix86_expand_sse_pcmpistr (d, exp, target);
29967 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
29968 if (d->code == fcode)
29969 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
29970 (enum ix86_builtin_func_type)
29971 d->flag, d->comparison);
29973 gcc_unreachable ();
29976 /* Returns a function decl for a vectorized version of the builtin function
29977 with builtin function code FN and the result vector type TYPE, or NULL_TREE
29978 if it is not available. */
29981 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
29984 enum machine_mode in_mode, out_mode;
29986 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29988 if (TREE_CODE (type_out) != VECTOR_TYPE
29989 || TREE_CODE (type_in) != VECTOR_TYPE
29990 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
29993 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29994 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29995 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29996 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30000 case BUILT_IN_SQRT:
30001 if (out_mode == DFmode && in_mode == DFmode)
30003 if (out_n == 2 && in_n == 2)
30004 return ix86_builtins[IX86_BUILTIN_SQRTPD];
30005 else if (out_n == 4 && in_n == 4)
30006 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
30010 case BUILT_IN_SQRTF:
30011 if (out_mode == SFmode && in_mode == SFmode)
30013 if (out_n == 4 && in_n == 4)
30014 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
30015 else if (out_n == 8 && in_n == 8)
30016 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
30020 case BUILT_IN_IFLOOR:
30021 case BUILT_IN_LFLOOR:
30022 case BUILT_IN_LLFLOOR:
30023 /* The round insn does not trap on denormals. */
30024 if (flag_trapping_math || !TARGET_ROUND)
30027 if (out_mode == SImode && in_mode == DFmode)
30029 if (out_n == 4 && in_n == 2)
30030 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX];
30031 else if (out_n == 8 && in_n == 4)
30032 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256];
30036 case BUILT_IN_IFLOORF:
30037 case BUILT_IN_LFLOORF:
30038 case BUILT_IN_LLFLOORF:
30039 /* The round insn does not trap on denormals. */
30040 if (flag_trapping_math || !TARGET_ROUND)
30043 if (out_mode == SImode && in_mode == SFmode)
30045 if (out_n == 4 && in_n == 4)
30046 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX];
30047 else if (out_n == 8 && in_n == 8)
30048 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX256];
30052 case BUILT_IN_ICEIL:
30053 case BUILT_IN_LCEIL:
30054 case BUILT_IN_LLCEIL:
30055 /* The round insn does not trap on denormals. */
30056 if (flag_trapping_math || !TARGET_ROUND)
30059 if (out_mode == SImode && in_mode == DFmode)
30061 if (out_n == 4 && in_n == 2)
30062 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX];
30063 else if (out_n == 8 && in_n == 4)
30064 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256];
30068 case BUILT_IN_ICEILF:
30069 case BUILT_IN_LCEILF:
30070 case BUILT_IN_LLCEILF:
30071 /* The round insn does not trap on denormals. */
30072 if (flag_trapping_math || !TARGET_ROUND)
30075 if (out_mode == SImode && in_mode == SFmode)
30077 if (out_n == 4 && in_n == 4)
30078 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX];
30079 else if (out_n == 8 && in_n == 8)
30080 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX256];
30084 case BUILT_IN_IRINT:
30085 case BUILT_IN_LRINT:
30086 case BUILT_IN_LLRINT:
30087 if (out_mode == SImode && in_mode == DFmode)
30089 if (out_n == 4 && in_n == 2)
30090 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
30091 else if (out_n == 8 && in_n == 4)
30092 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256];
30096 case BUILT_IN_IRINTF:
30097 case BUILT_IN_LRINTF:
30098 case BUILT_IN_LLRINTF:
30099 if (out_mode == SImode && in_mode == SFmode)
30101 if (out_n == 4 && in_n == 4)
30102 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
30103 else if (out_n == 8 && in_n == 8)
30104 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
30108 case BUILT_IN_IROUND:
30109 case BUILT_IN_LROUND:
30110 case BUILT_IN_LLROUND:
30111 /* The round insn does not trap on denormals. */
30112 if (flag_trapping_math || !TARGET_ROUND)
30115 if (out_mode == SImode && in_mode == DFmode)
30117 if (out_n == 4 && in_n == 2)
30118 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX];
30119 else if (out_n == 8 && in_n == 4)
30120 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256];
30124 case BUILT_IN_IROUNDF:
30125 case BUILT_IN_LROUNDF:
30126 case BUILT_IN_LLROUNDF:
30127 /* The round insn does not trap on denormals. */
30128 if (flag_trapping_math || !TARGET_ROUND)
30131 if (out_mode == SImode && in_mode == SFmode)
30133 if (out_n == 4 && in_n == 4)
30134 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX];
30135 else if (out_n == 8 && in_n == 8)
30136 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX256];
30140 case BUILT_IN_COPYSIGN:
30141 if (out_mode == DFmode && in_mode == DFmode)
30143 if (out_n == 2 && in_n == 2)
30144 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
30145 else if (out_n == 4 && in_n == 4)
30146 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
30150 case BUILT_IN_COPYSIGNF:
30151 if (out_mode == SFmode && in_mode == SFmode)
30153 if (out_n == 4 && in_n == 4)
30154 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
30155 else if (out_n == 8 && in_n == 8)
30156 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
30160 case BUILT_IN_FLOOR:
30161 /* The round insn does not trap on denormals. */
30162 if (flag_trapping_math || !TARGET_ROUND)
30165 if (out_mode == DFmode && in_mode == DFmode)
30167 if (out_n == 2 && in_n == 2)
30168 return ix86_builtins[IX86_BUILTIN_FLOORPD];
30169 else if (out_n == 4 && in_n == 4)
30170 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
30174 case BUILT_IN_FLOORF:
30175 /* The round insn does not trap on denormals. */
30176 if (flag_trapping_math || !TARGET_ROUND)
30179 if (out_mode == SFmode && in_mode == SFmode)
30181 if (out_n == 4 && in_n == 4)
30182 return ix86_builtins[IX86_BUILTIN_FLOORPS];
30183 else if (out_n == 8 && in_n == 8)
30184 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
30188 case BUILT_IN_CEIL:
30189 /* The round insn does not trap on denormals. */
30190 if (flag_trapping_math || !TARGET_ROUND)
30193 if (out_mode == DFmode && in_mode == DFmode)
30195 if (out_n == 2 && in_n == 2)
30196 return ix86_builtins[IX86_BUILTIN_CEILPD];
30197 else if (out_n == 4 && in_n == 4)
30198 return ix86_builtins[IX86_BUILTIN_CEILPD256];
30202 case BUILT_IN_CEILF:
30203 /* The round insn does not trap on denormals. */
30204 if (flag_trapping_math || !TARGET_ROUND)
30207 if (out_mode == SFmode && in_mode == SFmode)
30209 if (out_n == 4 && in_n == 4)
30210 return ix86_builtins[IX86_BUILTIN_CEILPS];
30211 else if (out_n == 8 && in_n == 8)
30212 return ix86_builtins[IX86_BUILTIN_CEILPS256];
30216 case BUILT_IN_TRUNC:
30217 /* The round insn does not trap on denormals. */
30218 if (flag_trapping_math || !TARGET_ROUND)
30221 if (out_mode == DFmode && in_mode == DFmode)
30223 if (out_n == 2 && in_n == 2)
30224 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
30225 else if (out_n == 4 && in_n == 4)
30226 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
30230 case BUILT_IN_TRUNCF:
30231 /* The round insn does not trap on denormals. */
30232 if (flag_trapping_math || !TARGET_ROUND)
30235 if (out_mode == SFmode && in_mode == SFmode)
30237 if (out_n == 4 && in_n == 4)
30238 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
30239 else if (out_n == 8 && in_n == 8)
30240 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
30244 case BUILT_IN_RINT:
30245 /* The round insn does not trap on denormals. */
30246 if (flag_trapping_math || !TARGET_ROUND)
30249 if (out_mode == DFmode && in_mode == DFmode)
30251 if (out_n == 2 && in_n == 2)
30252 return ix86_builtins[IX86_BUILTIN_RINTPD];
30253 else if (out_n == 4 && in_n == 4)
30254 return ix86_builtins[IX86_BUILTIN_RINTPD256];
30258 case BUILT_IN_RINTF:
30259 /* The round insn does not trap on denormals. */
30260 if (flag_trapping_math || !TARGET_ROUND)
30263 if (out_mode == SFmode && in_mode == SFmode)
30265 if (out_n == 4 && in_n == 4)
30266 return ix86_builtins[IX86_BUILTIN_RINTPS];
30267 else if (out_n == 8 && in_n == 8)
30268 return ix86_builtins[IX86_BUILTIN_RINTPS256];
30272 case BUILT_IN_ROUND:
30273 /* The round insn does not trap on denormals. */
30274 if (flag_trapping_math || !TARGET_ROUND)
30277 if (out_mode == DFmode && in_mode == DFmode)
30279 if (out_n == 2 && in_n == 2)
30280 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
30281 else if (out_n == 4 && in_n == 4)
30282 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
30286 case BUILT_IN_ROUNDF:
30287 /* The round insn does not trap on denormals. */
30288 if (flag_trapping_math || !TARGET_ROUND)
30291 if (out_mode == SFmode && in_mode == SFmode)
30293 if (out_n == 4 && in_n == 4)
30294 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
30295 else if (out_n == 8 && in_n == 8)
30296 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
30301 if (out_mode == DFmode && in_mode == DFmode)
30303 if (out_n == 2 && in_n == 2)
30304 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
30305 if (out_n == 4 && in_n == 4)
30306 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
30310 case BUILT_IN_FMAF:
30311 if (out_mode == SFmode && in_mode == SFmode)
30313 if (out_n == 4 && in_n == 4)
30314 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
30315 if (out_n == 8 && in_n == 8)
30316 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
30324 /* Dispatch to a handler for a vectorization library. */
30325 if (ix86_veclib_handler)
30326 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
30332 /* Handler for an SVML-style interface to
30333 a library with vectorized intrinsics. */
30336 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
30339 tree fntype, new_fndecl, args;
30342 enum machine_mode el_mode, in_mode;
30345 /* The SVML is suitable for unsafe math only. */
30346 if (!flag_unsafe_math_optimizations)
30349 el_mode = TYPE_MODE (TREE_TYPE (type_out));
30350 n = TYPE_VECTOR_SUBPARTS (type_out);
30351 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30352 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30353 if (el_mode != in_mode
30361 case BUILT_IN_LOG10:
30363 case BUILT_IN_TANH:
30365 case BUILT_IN_ATAN:
30366 case BUILT_IN_ATAN2:
30367 case BUILT_IN_ATANH:
30368 case BUILT_IN_CBRT:
30369 case BUILT_IN_SINH:
30371 case BUILT_IN_ASINH:
30372 case BUILT_IN_ASIN:
30373 case BUILT_IN_COSH:
30375 case BUILT_IN_ACOSH:
30376 case BUILT_IN_ACOS:
30377 if (el_mode != DFmode || n != 2)
30381 case BUILT_IN_EXPF:
30382 case BUILT_IN_LOGF:
30383 case BUILT_IN_LOG10F:
30384 case BUILT_IN_POWF:
30385 case BUILT_IN_TANHF:
30386 case BUILT_IN_TANF:
30387 case BUILT_IN_ATANF:
30388 case BUILT_IN_ATAN2F:
30389 case BUILT_IN_ATANHF:
30390 case BUILT_IN_CBRTF:
30391 case BUILT_IN_SINHF:
30392 case BUILT_IN_SINF:
30393 case BUILT_IN_ASINHF:
30394 case BUILT_IN_ASINF:
30395 case BUILT_IN_COSHF:
30396 case BUILT_IN_COSF:
30397 case BUILT_IN_ACOSHF:
30398 case BUILT_IN_ACOSF:
30399 if (el_mode != SFmode || n != 4)
30407 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
30409 if (fn == BUILT_IN_LOGF)
30410 strcpy (name, "vmlsLn4");
30411 else if (fn == BUILT_IN_LOG)
30412 strcpy (name, "vmldLn2");
30415 sprintf (name, "vmls%s", bname+10);
30416 name[strlen (name)-1] = '4';
30419 sprintf (name, "vmld%s2", bname+10);
30421 /* Convert to uppercase. */
30425 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
30427 args = TREE_CHAIN (args))
30431 fntype = build_function_type_list (type_out, type_in, NULL);
30433 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
30435 /* Build a function declaration for the vectorized function. */
30436 new_fndecl = build_decl (BUILTINS_LOCATION,
30437 FUNCTION_DECL, get_identifier (name), fntype);
30438 TREE_PUBLIC (new_fndecl) = 1;
30439 DECL_EXTERNAL (new_fndecl) = 1;
30440 DECL_IS_NOVOPS (new_fndecl) = 1;
30441 TREE_READONLY (new_fndecl) = 1;
30446 /* Handler for an ACML-style interface to
30447 a library with vectorized intrinsics. */
30450 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
30452 char name[20] = "__vr.._";
30453 tree fntype, new_fndecl, args;
30456 enum machine_mode el_mode, in_mode;
30459 /* The ACML is 64bits only and suitable for unsafe math only as
30460 it does not correctly support parts of IEEE with the required
30461 precision such as denormals. */
30463 || !flag_unsafe_math_optimizations)
30466 el_mode = TYPE_MODE (TREE_TYPE (type_out));
30467 n = TYPE_VECTOR_SUBPARTS (type_out);
30468 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30469 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30470 if (el_mode != in_mode
30480 case BUILT_IN_LOG2:
30481 case BUILT_IN_LOG10:
30484 if (el_mode != DFmode
30489 case BUILT_IN_SINF:
30490 case BUILT_IN_COSF:
30491 case BUILT_IN_EXPF:
30492 case BUILT_IN_POWF:
30493 case BUILT_IN_LOGF:
30494 case BUILT_IN_LOG2F:
30495 case BUILT_IN_LOG10F:
30498 if (el_mode != SFmode
30507 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
30508 sprintf (name + 7, "%s", bname+10);
30511 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
30513 args = TREE_CHAIN (args))
30517 fntype = build_function_type_list (type_out, type_in, NULL);
30519 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
30521 /* Build a function declaration for the vectorized function. */
30522 new_fndecl = build_decl (BUILTINS_LOCATION,
30523 FUNCTION_DECL, get_identifier (name), fntype);
30524 TREE_PUBLIC (new_fndecl) = 1;
30525 DECL_EXTERNAL (new_fndecl) = 1;
30526 DECL_IS_NOVOPS (new_fndecl) = 1;
30527 TREE_READONLY (new_fndecl) = 1;
30532 /* Returns a decl of a function that implements gather load with
30533 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
30534 Return NULL_TREE if it is not available. */
30537 ix86_vectorize_builtin_gather (const_tree mem_vectype,
30538 const_tree index_type, int scale)
30541 enum ix86_builtins code;
30546 if ((TREE_CODE (index_type) != INTEGER_TYPE
30547 && !POINTER_TYPE_P (index_type))
30548 || (TYPE_MODE (index_type) != SImode
30549 && TYPE_MODE (index_type) != DImode))
30552 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
30555 /* v*gather* insn sign extends index to pointer mode. */
30556 if (TYPE_PRECISION (index_type) < POINTER_SIZE
30557 && TYPE_UNSIGNED (index_type))
30562 || (scale & (scale - 1)) != 0)
30565 si = TYPE_MODE (index_type) == SImode;
30566 switch (TYPE_MODE (mem_vectype))
30569 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
30572 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
30575 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
30578 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
30581 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
30584 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
30587 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
30590 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
30596 return ix86_builtins[code];
30599 /* Returns a code for a target-specific builtin that implements
30600 reciprocal of the function, or NULL_TREE if not available. */
30603 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
30604 bool sqrt ATTRIBUTE_UNUSED)
30606 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
30607 && flag_finite_math_only && !flag_trapping_math
30608 && flag_unsafe_math_optimizations))
30612 /* Machine dependent builtins. */
30615 /* Vectorized version of sqrt to rsqrt conversion. */
30616 case IX86_BUILTIN_SQRTPS_NR:
30617 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
30619 case IX86_BUILTIN_SQRTPS_NR256:
30620 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
30626 /* Normal builtins. */
30629 /* Sqrt to rsqrt conversion. */
30630 case BUILT_IN_SQRTF:
30631 return ix86_builtins[IX86_BUILTIN_RSQRTF];
30638 /* Helper for avx_vpermilps256_operand et al. This is also used by
30639 the expansion functions to turn the parallel back into a mask.
30640 The return value is 0 for no match and the imm8+1 for a match. */
30643 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
30645 unsigned i, nelt = GET_MODE_NUNITS (mode);
30647 unsigned char ipar[8];
30649 if (XVECLEN (par, 0) != (int) nelt)
30652 /* Validate that all of the elements are constants, and not totally
30653 out of range. Copy the data into an integral array to make the
30654 subsequent checks easier. */
30655 for (i = 0; i < nelt; ++i)
30657 rtx er = XVECEXP (par, 0, i);
30658 unsigned HOST_WIDE_INT ei;
30660 if (!CONST_INT_P (er))
30671 /* In the 256-bit DFmode case, we can only move elements within
30673 for (i = 0; i < 2; ++i)
30677 mask |= ipar[i] << i;
30679 for (i = 2; i < 4; ++i)
30683 mask |= (ipar[i] - 2) << i;
30688 /* In the 256-bit SFmode case, we have full freedom of movement
30689 within the low 128-bit lane, but the high 128-bit lane must
30690 mirror the exact same pattern. */
30691 for (i = 0; i < 4; ++i)
30692 if (ipar[i] + 4 != ipar[i + 4])
30699 /* In the 128-bit case, we've full freedom in the placement of
30700 the elements from the source operand. */
30701 for (i = 0; i < nelt; ++i)
30702 mask |= ipar[i] << (i * (nelt / 2));
30706 gcc_unreachable ();
30709 /* Make sure success has a non-zero value by adding one. */
30713 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
30714 the expansion functions to turn the parallel back into a mask.
30715 The return value is 0 for no match and the imm8+1 for a match. */
30718 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
30720 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
30722 unsigned char ipar[8];
30724 if (XVECLEN (par, 0) != (int) nelt)
30727 /* Validate that all of the elements are constants, and not totally
30728 out of range. Copy the data into an integral array to make the
30729 subsequent checks easier. */
30730 for (i = 0; i < nelt; ++i)
30732 rtx er = XVECEXP (par, 0, i);
30733 unsigned HOST_WIDE_INT ei;
30735 if (!CONST_INT_P (er))
30738 if (ei >= 2 * nelt)
30743 /* Validate that the halves of the permute are halves. */
30744 for (i = 0; i < nelt2 - 1; ++i)
30745 if (ipar[i] + 1 != ipar[i + 1])
30747 for (i = nelt2; i < nelt - 1; ++i)
30748 if (ipar[i] + 1 != ipar[i + 1])
30751 /* Reconstruct the mask. */
30752 for (i = 0; i < 2; ++i)
30754 unsigned e = ipar[i * nelt2];
30758 mask |= e << (i * 4);
30761 /* Make sure success has a non-zero value by adding one. */
30765 /* Store OPERAND to the memory after reload is completed. This means
30766 that we can't easily use assign_stack_local. */
30768 ix86_force_to_memory (enum machine_mode mode, rtx operand)
30772 gcc_assert (reload_completed);
30773 if (ix86_using_red_zone ())
30775 result = gen_rtx_MEM (mode,
30776 gen_rtx_PLUS (Pmode,
30778 GEN_INT (-RED_ZONE_SIZE)));
30779 emit_move_insn (result, operand);
30781 else if (TARGET_64BIT)
30787 operand = gen_lowpart (DImode, operand);
30791 gen_rtx_SET (VOIDmode,
30792 gen_rtx_MEM (DImode,
30793 gen_rtx_PRE_DEC (DImode,
30794 stack_pointer_rtx)),
30798 gcc_unreachable ();
30800 result = gen_rtx_MEM (mode, stack_pointer_rtx);
30809 split_double_mode (mode, &operand, 1, operands, operands + 1);
30811 gen_rtx_SET (VOIDmode,
30812 gen_rtx_MEM (SImode,
30813 gen_rtx_PRE_DEC (Pmode,
30814 stack_pointer_rtx)),
30817 gen_rtx_SET (VOIDmode,
30818 gen_rtx_MEM (SImode,
30819 gen_rtx_PRE_DEC (Pmode,
30820 stack_pointer_rtx)),
30825 /* Store HImodes as SImodes. */
30826 operand = gen_lowpart (SImode, operand);
30830 gen_rtx_SET (VOIDmode,
30831 gen_rtx_MEM (GET_MODE (operand),
30832 gen_rtx_PRE_DEC (SImode,
30833 stack_pointer_rtx)),
30837 gcc_unreachable ();
30839 result = gen_rtx_MEM (mode, stack_pointer_rtx);
30844 /* Free operand from the memory. */
30846 ix86_free_from_memory (enum machine_mode mode)
30848 if (!ix86_using_red_zone ())
30852 if (mode == DImode || TARGET_64BIT)
30856 /* Use LEA to deallocate stack space. In peephole2 it will be converted
30857 to pop or add instruction if registers are available. */
30858 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
30859 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
30864 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
30866 Put float CONST_DOUBLE in the constant pool instead of fp regs.
30867 QImode must go into class Q_REGS.
30868 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
30869 movdf to do mem-to-mem moves through integer regs. */
30872 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
30874 enum machine_mode mode = GET_MODE (x);
30876 /* We're only allowed to return a subclass of CLASS. Many of the
30877 following checks fail for NO_REGS, so eliminate that early. */
30878 if (regclass == NO_REGS)
30881 /* All classes can load zeros. */
30882 if (x == CONST0_RTX (mode))
30885 /* Force constants into memory if we are loading a (nonzero) constant into
30886 an MMX or SSE register. This is because there are no MMX/SSE instructions
30887 to load from a constant. */
30889 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
30892 /* Prefer SSE regs only, if we can use them for math. */
30893 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
30894 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
30896 /* Floating-point constants need more complex checks. */
30897 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
30899 /* General regs can load everything. */
30900 if (reg_class_subset_p (regclass, GENERAL_REGS))
30903 /* Floats can load 0 and 1 plus some others. Note that we eliminated
30904 zero above. We only want to wind up preferring 80387 registers if
30905 we plan on doing computation with them. */
30907 && standard_80387_constant_p (x) > 0)
30909 /* Limit class to non-sse. */
30910 if (regclass == FLOAT_SSE_REGS)
30912 if (regclass == FP_TOP_SSE_REGS)
30914 if (regclass == FP_SECOND_SSE_REGS)
30915 return FP_SECOND_REG;
30916 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
30923 /* Generally when we see PLUS here, it's the function invariant
30924 (plus soft-fp const_int). Which can only be computed into general
30926 if (GET_CODE (x) == PLUS)
30927 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
30929 /* QImode constants are easy to load, but non-constant QImode data
30930 must go into Q_REGS. */
30931 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
30933 if (reg_class_subset_p (regclass, Q_REGS))
30935 if (reg_class_subset_p (Q_REGS, regclass))
30943 /* Discourage putting floating-point values in SSE registers unless
30944 SSE math is being used, and likewise for the 387 registers. */
30946 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
30948 enum machine_mode mode = GET_MODE (x);
30950 /* Restrict the output reload class to the register bank that we are doing
30951 math on. If we would like not to return a subset of CLASS, reject this
30952 alternative: if reload cannot do this, it will still use its choice. */
30953 mode = GET_MODE (x);
30954 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
30955 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
30957 if (X87_FLOAT_MODE_P (mode))
30959 if (regclass == FP_TOP_SSE_REGS)
30961 else if (regclass == FP_SECOND_SSE_REGS)
30962 return FP_SECOND_REG;
30964 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
30971 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
30972 enum machine_mode mode, secondary_reload_info *sri)
30974 /* Double-word spills from general registers to non-offsettable memory
30975 references (zero-extended addresses) require special handling. */
30978 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
30979 && rclass == GENERAL_REGS
30980 && !offsettable_memref_p (x))
30983 ? CODE_FOR_reload_noff_load
30984 : CODE_FOR_reload_noff_store);
30985 /* Add the cost of moving address to a temporary. */
30986 sri->extra_cost = 1;
30991 /* QImode spills from non-QI registers require
30992 intermediate register on 32bit targets. */
30994 && !in_p && mode == QImode
30995 && (rclass == GENERAL_REGS
30996 || rclass == LEGACY_REGS
30997 || rclass == INDEX_REGS))
31006 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
31007 regno = true_regnum (x);
31009 /* Return Q_REGS if the operand is in memory. */
31014 /* This condition handles corner case where an expression involving
31015 pointers gets vectorized. We're trying to use the address of a
31016 stack slot as a vector initializer.
31018 (set (reg:V2DI 74 [ vect_cst_.2 ])
31019 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
31021 Eventually frame gets turned into sp+offset like this:
31023 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31024 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31025 (const_int 392 [0x188]))))
31027 That later gets turned into:
31029 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31030 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31031 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
31033 We'll have the following reload recorded:
31035 Reload 0: reload_in (DI) =
31036 (plus:DI (reg/f:DI 7 sp)
31037 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
31038 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31039 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
31040 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
31041 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31042 reload_reg_rtx: (reg:V2DI 22 xmm1)
31044 Which isn't going to work since SSE instructions can't handle scalar
31045 additions. Returning GENERAL_REGS forces the addition into integer
31046 register and reload can handle subsequent reloads without problems. */
31048 if (in_p && GET_CODE (x) == PLUS
31049 && SSE_CLASS_P (rclass)
31050 && SCALAR_INT_MODE_P (mode))
31051 return GENERAL_REGS;
31056 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
31059 ix86_class_likely_spilled_p (reg_class_t rclass)
31070 case SSE_FIRST_REG:
31072 case FP_SECOND_REG:
31082 /* If we are copying between general and FP registers, we need a memory
31083 location. The same is true for SSE and MMX registers.
31085 To optimize register_move_cost performance, allow inline variant.
31087 The macro can't work reliably when one of the CLASSES is class containing
31088 registers from multiple units (SSE, MMX, integer). We avoid this by never
31089 combining those units in single alternative in the machine description.
31090 Ensure that this constraint holds to avoid unexpected surprises.
31092 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
31093 enforce these sanity checks. */
31096 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
31097 enum machine_mode mode, int strict)
31099 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
31100 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
31101 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
31102 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
31103 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
31104 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
31106 gcc_assert (!strict);
31110 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
31113 /* ??? This is a lie. We do have moves between mmx/general, and for
31114 mmx/sse2. But by saying we need secondary memory we discourage the
31115 register allocator from using the mmx registers unless needed. */
31116 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
31119 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
31121 /* SSE1 doesn't have any direct moves from other classes. */
31125 /* If the target says that inter-unit moves are more expensive
31126 than moving through memory, then don't generate them. */
31127 if (!TARGET_INTER_UNIT_MOVES)
31130 /* Between SSE and general, we have moves no larger than word size. */
31131 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
31139 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
31140 enum machine_mode mode, int strict)
31142 return inline_secondary_memory_needed (class1, class2, mode, strict);
31145 /* Implement the TARGET_CLASS_MAX_NREGS hook.
31147 On the 80386, this is the size of MODE in words,
31148 except in the FP regs, where a single reg is always enough. */
31150 static unsigned char
31151 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
31153 if (MAYBE_INTEGER_CLASS_P (rclass))
31155 if (mode == XFmode)
31156 return (TARGET_64BIT ? 2 : 3);
31157 else if (mode == XCmode)
31158 return (TARGET_64BIT ? 4 : 6);
31160 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
31164 if (COMPLEX_MODE_P (mode))
31171 /* Return true if the registers in CLASS cannot represent the change from
31172 modes FROM to TO. */
31175 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
31176 enum reg_class regclass)
31181 /* x87 registers can't do subreg at all, as all values are reformatted
31182 to extended precision. */
31183 if (MAYBE_FLOAT_CLASS_P (regclass))
31186 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
31188 /* Vector registers do not support QI or HImode loads. If we don't
31189 disallow a change to these modes, reload will assume it's ok to
31190 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
31191 the vec_dupv4hi pattern. */
31192 if (GET_MODE_SIZE (from) < 4)
31195 /* Vector registers do not support subreg with nonzero offsets, which
31196 are otherwise valid for integer registers. Since we can't see
31197 whether we have a nonzero offset from here, prohibit all
31198 nonparadoxical subregs changing size. */
31199 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
31206 /* Return the cost of moving data of mode M between a
31207 register and memory. A value of 2 is the default; this cost is
31208 relative to those in `REGISTER_MOVE_COST'.
31210 This function is used extensively by register_move_cost that is used to
31211 build tables at startup. Make it inline in this case.
31212 When IN is 2, return maximum of in and out move cost.
31214 If moving between registers and memory is more expensive than
31215 between two registers, you should define this macro to express the
31218 Model also increased moving costs of QImode registers in non
31222 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
31226 if (FLOAT_CLASS_P (regclass))
31244 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
31245 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
31247 if (SSE_CLASS_P (regclass))
31250 switch (GET_MODE_SIZE (mode))
31265 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
31266 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
31268 if (MMX_CLASS_P (regclass))
31271 switch (GET_MODE_SIZE (mode))
31283 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
31284 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
31286 switch (GET_MODE_SIZE (mode))
31289 if (Q_CLASS_P (regclass) || TARGET_64BIT)
31292 return ix86_cost->int_store[0];
31293 if (TARGET_PARTIAL_REG_DEPENDENCY
31294 && optimize_function_for_speed_p (cfun))
31295 cost = ix86_cost->movzbl_load;
31297 cost = ix86_cost->int_load[0];
31299 return MAX (cost, ix86_cost->int_store[0]);
31305 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
31307 return ix86_cost->movzbl_load;
31309 return ix86_cost->int_store[0] + 4;
31314 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
31315 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
31317 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
31318 if (mode == TFmode)
31321 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
31323 cost = ix86_cost->int_load[2];
31325 cost = ix86_cost->int_store[2];
31326 return (cost * (((int) GET_MODE_SIZE (mode)
31327 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
31332 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
31335 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
31339 /* Return the cost of moving data from a register in class CLASS1 to
31340 one in class CLASS2.
31342 It is not required that the cost always equal 2 when FROM is the same as TO;
31343 on some machines it is expensive to move between registers if they are not
31344 general registers. */
31347 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
31348 reg_class_t class2_i)
31350 enum reg_class class1 = (enum reg_class) class1_i;
31351 enum reg_class class2 = (enum reg_class) class2_i;
31353 /* In case we require secondary memory, compute cost of the store followed
31354 by load. In order to avoid bad register allocation choices, we need
31355 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31357 if (inline_secondary_memory_needed (class1, class2, mode, 0))
31361 cost += inline_memory_move_cost (mode, class1, 2);
31362 cost += inline_memory_move_cost (mode, class2, 2);
31364 /* In case of copying from general_purpose_register we may emit multiple
31365 stores followed by single load causing memory size mismatch stall.
31366 Count this as arbitrarily high cost of 20. */
31367 if (targetm.class_max_nregs (class1, mode)
31368 > targetm.class_max_nregs (class2, mode))
31371 /* In the case of FP/MMX moves, the registers actually overlap, and we
31372 have to switch modes in order to treat them differently. */
31373 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
31374 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
31380 /* Moves between SSE/MMX and integer unit are expensive. */
31381 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
31382 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
31384 /* ??? By keeping returned value relatively high, we limit the number
31385 of moves between integer and MMX/SSE registers for all targets.
31386 Additionally, high value prevents problem with x86_modes_tieable_p(),
31387 where integer modes in MMX/SSE registers are not tieable
31388 because of missing QImode and HImode moves to, from or between
31389 MMX/SSE registers. */
31390 return MAX (8, ix86_cost->mmxsse_to_integer);
31392 if (MAYBE_FLOAT_CLASS_P (class1))
31393 return ix86_cost->fp_move;
31394 if (MAYBE_SSE_CLASS_P (class1))
31395 return ix86_cost->sse_move;
31396 if (MAYBE_MMX_CLASS_P (class1))
31397 return ix86_cost->mmx_move;
31401 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31405 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
31407 /* Flags and only flags can only hold CCmode values. */
31408 if (CC_REGNO_P (regno))
31409 return GET_MODE_CLASS (mode) == MODE_CC;
31410 if (GET_MODE_CLASS (mode) == MODE_CC
31411 || GET_MODE_CLASS (mode) == MODE_RANDOM
31412 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
31414 if (FP_REGNO_P (regno))
31415 return VALID_FP_MODE_P (mode);
31416 if (SSE_REGNO_P (regno))
31418 /* We implement the move patterns for all vector modes into and
31419 out of SSE registers, even when no operation instructions
31420 are available. OImode move is available only when AVX is
31422 return ((TARGET_AVX && mode == OImode)
31423 || VALID_AVX256_REG_MODE (mode)
31424 || VALID_SSE_REG_MODE (mode)
31425 || VALID_SSE2_REG_MODE (mode)
31426 || VALID_MMX_REG_MODE (mode)
31427 || VALID_MMX_REG_MODE_3DNOW (mode));
31429 if (MMX_REGNO_P (regno))
31431 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31432 so if the register is available at all, then we can move data of
31433 the given mode into or out of it. */
31434 return (VALID_MMX_REG_MODE (mode)
31435 || VALID_MMX_REG_MODE_3DNOW (mode));
31438 if (mode == QImode)
31440 /* Take care for QImode values - they can be in non-QI regs,
31441 but then they do cause partial register stalls. */
31442 if (regno <= BX_REG || TARGET_64BIT)
31444 if (!TARGET_PARTIAL_REG_STALL)
31446 return !can_create_pseudo_p ();
31448 /* We handle both integer and floats in the general purpose registers. */
31449 else if (VALID_INT_MODE_P (mode))
31451 else if (VALID_FP_MODE_P (mode))
31453 else if (VALID_DFP_MODE_P (mode))
31455 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
31456 on to use that value in smaller contexts, this can easily force a
31457 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
31458 supporting DImode, allow it. */
31459 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
31465 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
31466 tieable integer mode. */
31469 ix86_tieable_integer_mode_p (enum machine_mode mode)
31478 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
31481 return TARGET_64BIT;
31488 /* Return true if MODE1 is accessible in a register that can hold MODE2
31489 without copying. That is, all register classes that can hold MODE2
31490 can also hold MODE1. */
31493 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
31495 if (mode1 == mode2)
31498 if (ix86_tieable_integer_mode_p (mode1)
31499 && ix86_tieable_integer_mode_p (mode2))
31502 /* MODE2 being XFmode implies fp stack or general regs, which means we
31503 can tie any smaller floating point modes to it. Note that we do not
31504 tie this with TFmode. */
31505 if (mode2 == XFmode)
31506 return mode1 == SFmode || mode1 == DFmode;
31508 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
31509 that we can tie it with SFmode. */
31510 if (mode2 == DFmode)
31511 return mode1 == SFmode;
31513 /* If MODE2 is only appropriate for an SSE register, then tie with
31514 any other mode acceptable to SSE registers. */
31515 if (GET_MODE_SIZE (mode2) == 16
31516 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
31517 return (GET_MODE_SIZE (mode1) == 16
31518 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
31520 /* If MODE2 is appropriate for an MMX register, then tie
31521 with any other mode acceptable to MMX registers. */
31522 if (GET_MODE_SIZE (mode2) == 8
31523 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
31524 return (GET_MODE_SIZE (mode1) == 8
31525 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
31530 /* Compute a (partial) cost for rtx X. Return true if the complete
31531 cost has been computed, and false if subexpressions should be
31532 scanned. In either case, *TOTAL contains the cost result. */
31535 ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
31538 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
31539 enum machine_mode mode = GET_MODE (x);
31540 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
31548 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
31550 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
31552 else if (flag_pic && SYMBOLIC_CONST (x)
31554 || (!GET_CODE (x) != LABEL_REF
31555 && (GET_CODE (x) != SYMBOL_REF
31556 || !SYMBOL_REF_LOCAL_P (x)))))
31563 if (mode == VOIDmode)
31566 switch (standard_80387_constant_p (x))
31571 default: /* Other constants */
31576 /* Start with (MEM (SYMBOL_REF)), since that's where
31577 it'll probably end up. Add a penalty for size. */
31578 *total = (COSTS_N_INSNS (1)
31579 + (flag_pic != 0 && !TARGET_64BIT)
31580 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
31586 /* The zero extensions is often completely free on x86_64, so make
31587 it as cheap as possible. */
31588 if (TARGET_64BIT && mode == DImode
31589 && GET_MODE (XEXP (x, 0)) == SImode)
31591 else if (TARGET_ZERO_EXTEND_WITH_AND)
31592 *total = cost->add;
31594 *total = cost->movzx;
31598 *total = cost->movsx;
31602 if (CONST_INT_P (XEXP (x, 1))
31603 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
31605 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
31608 *total = cost->add;
31611 if ((value == 2 || value == 3)
31612 && cost->lea <= cost->shift_const)
31614 *total = cost->lea;
31624 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
31626 if (CONST_INT_P (XEXP (x, 1)))
31628 if (INTVAL (XEXP (x, 1)) > 32)
31629 *total = cost->shift_const + COSTS_N_INSNS (2);
31631 *total = cost->shift_const * 2;
31635 if (GET_CODE (XEXP (x, 1)) == AND)
31636 *total = cost->shift_var * 2;
31638 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
31643 if (CONST_INT_P (XEXP (x, 1)))
31644 *total = cost->shift_const;
31645 else if (GET_CODE (XEXP (x, 1)) == SUBREG
31646 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
31648 /* Return the cost after shift-and truncation. */
31649 *total = cost->shift_var;
31653 *total = cost->shift_var;
31661 gcc_assert (FLOAT_MODE_P (mode));
31662 gcc_assert (TARGET_FMA || TARGET_FMA4);
31664 /* ??? SSE scalar/vector cost should be used here. */
31665 /* ??? Bald assumption that fma has the same cost as fmul. */
31666 *total = cost->fmul;
31667 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
31669 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
31671 if (GET_CODE (sub) == NEG)
31672 sub = XEXP (sub, 0);
31673 *total += rtx_cost (sub, FMA, 0, speed);
31676 if (GET_CODE (sub) == NEG)
31677 sub = XEXP (sub, 0);
31678 *total += rtx_cost (sub, FMA, 2, speed);
31683 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31685 /* ??? SSE scalar cost should be used here. */
31686 *total = cost->fmul;
31689 else if (X87_FLOAT_MODE_P (mode))
31691 *total = cost->fmul;
31694 else if (FLOAT_MODE_P (mode))
31696 /* ??? SSE vector cost should be used here. */
31697 *total = cost->fmul;
31702 rtx op0 = XEXP (x, 0);
31703 rtx op1 = XEXP (x, 1);
31705 if (CONST_INT_P (XEXP (x, 1)))
31707 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
31708 for (nbits = 0; value != 0; value &= value - 1)
31712 /* This is arbitrary. */
31715 /* Compute costs correctly for widening multiplication. */
31716 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
31717 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
31718 == GET_MODE_SIZE (mode))
31720 int is_mulwiden = 0;
31721 enum machine_mode inner_mode = GET_MODE (op0);
31723 if (GET_CODE (op0) == GET_CODE (op1))
31724 is_mulwiden = 1, op1 = XEXP (op1, 0);
31725 else if (CONST_INT_P (op1))
31727 if (GET_CODE (op0) == SIGN_EXTEND)
31728 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
31731 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
31735 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
31738 *total = (cost->mult_init[MODE_INDEX (mode)]
31739 + nbits * cost->mult_bit
31740 + rtx_cost (op0, outer_code, opno, speed)
31741 + rtx_cost (op1, outer_code, opno, speed));
31750 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31751 /* ??? SSE cost should be used here. */
31752 *total = cost->fdiv;
31753 else if (X87_FLOAT_MODE_P (mode))
31754 *total = cost->fdiv;
31755 else if (FLOAT_MODE_P (mode))
31756 /* ??? SSE vector cost should be used here. */
31757 *total = cost->fdiv;
31759 *total = cost->divide[MODE_INDEX (mode)];
31763 if (GET_MODE_CLASS (mode) == MODE_INT
31764 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
31766 if (GET_CODE (XEXP (x, 0)) == PLUS
31767 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
31768 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
31769 && CONSTANT_P (XEXP (x, 1)))
31771 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
31772 if (val == 2 || val == 4 || val == 8)
31774 *total = cost->lea;
31775 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
31776 outer_code, opno, speed);
31777 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
31778 outer_code, opno, speed);
31779 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31783 else if (GET_CODE (XEXP (x, 0)) == MULT
31784 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
31786 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
31787 if (val == 2 || val == 4 || val == 8)
31789 *total = cost->lea;
31790 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
31791 outer_code, opno, speed);
31792 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31796 else if (GET_CODE (XEXP (x, 0)) == PLUS)
31798 *total = cost->lea;
31799 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
31800 outer_code, opno, speed);
31801 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
31802 outer_code, opno, speed);
31803 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31810 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31812 /* ??? SSE cost should be used here. */
31813 *total = cost->fadd;
31816 else if (X87_FLOAT_MODE_P (mode))
31818 *total = cost->fadd;
31821 else if (FLOAT_MODE_P (mode))
31823 /* ??? SSE vector cost should be used here. */
31824 *total = cost->fadd;
31832 if (!TARGET_64BIT && mode == DImode)
31834 *total = (cost->add * 2
31835 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
31836 << (GET_MODE (XEXP (x, 0)) != DImode))
31837 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
31838 << (GET_MODE (XEXP (x, 1)) != DImode)));
31844 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31846 /* ??? SSE cost should be used here. */
31847 *total = cost->fchs;
31850 else if (X87_FLOAT_MODE_P (mode))
31852 *total = cost->fchs;
31855 else if (FLOAT_MODE_P (mode))
31857 /* ??? SSE vector cost should be used here. */
31858 *total = cost->fchs;
31864 if (!TARGET_64BIT && mode == DImode)
31865 *total = cost->add * 2;
31867 *total = cost->add;
31871 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
31872 && XEXP (XEXP (x, 0), 1) == const1_rtx
31873 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
31874 && XEXP (x, 1) == const0_rtx)
31876 /* This kind of construct is implemented using test[bwl].
31877 Treat it as if we had an AND. */
31878 *total = (cost->add
31879 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
31880 + rtx_cost (const1_rtx, outer_code, opno, speed));
31886 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
31891 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31892 /* ??? SSE cost should be used here. */
31893 *total = cost->fabs;
31894 else if (X87_FLOAT_MODE_P (mode))
31895 *total = cost->fabs;
31896 else if (FLOAT_MODE_P (mode))
31897 /* ??? SSE vector cost should be used here. */
31898 *total = cost->fabs;
31902 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31903 /* ??? SSE cost should be used here. */
31904 *total = cost->fsqrt;
31905 else if (X87_FLOAT_MODE_P (mode))
31906 *total = cost->fsqrt;
31907 else if (FLOAT_MODE_P (mode))
31908 /* ??? SSE vector cost should be used here. */
31909 *total = cost->fsqrt;
31913 if (XINT (x, 1) == UNSPEC_TP)
31920 case VEC_DUPLICATE:
31921 /* ??? Assume all of these vector manipulation patterns are
31922 recognizable. In which case they all pretty much have the
31924 *total = COSTS_N_INSNS (1);
31934 static int current_machopic_label_num;
31936 /* Given a symbol name and its associated stub, write out the
31937 definition of the stub. */
31940 machopic_output_stub (FILE *file, const char *symb, const char *stub)
31942 unsigned int length;
31943 char *binder_name, *symbol_name, lazy_ptr_name[32];
31944 int label = ++current_machopic_label_num;
31946 /* For 64-bit we shouldn't get here. */
31947 gcc_assert (!TARGET_64BIT);
31949 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
31950 symb = targetm.strip_name_encoding (symb);
31952 length = strlen (stub);
31953 binder_name = XALLOCAVEC (char, length + 32);
31954 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
31956 length = strlen (symb);
31957 symbol_name = XALLOCAVEC (char, length + 32);
31958 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
31960 sprintf (lazy_ptr_name, "L%d$lz", label);
31962 if (MACHOPIC_ATT_STUB)
31963 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
31964 else if (MACHOPIC_PURE)
31965 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
31967 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
31969 fprintf (file, "%s:\n", stub);
31970 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31972 if (MACHOPIC_ATT_STUB)
31974 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
31976 else if (MACHOPIC_PURE)
31979 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31980 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
31981 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
31982 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
31983 label, lazy_ptr_name, label);
31984 fprintf (file, "\tjmp\t*%%ecx\n");
31987 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
31989 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
31990 it needs no stub-binding-helper. */
31991 if (MACHOPIC_ATT_STUB)
31994 fprintf (file, "%s:\n", binder_name);
31998 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
31999 fprintf (file, "\tpushl\t%%ecx\n");
32002 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
32004 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
32006 /* N.B. Keep the correspondence of these
32007 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
32008 old-pic/new-pic/non-pic stubs; altering this will break
32009 compatibility with existing dylibs. */
32012 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32013 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
32016 /* 16-byte -mdynamic-no-pic stub. */
32017 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
32019 fprintf (file, "%s:\n", lazy_ptr_name);
32020 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
32021 fprintf (file, ASM_LONG "%s\n", binder_name);
32023 #endif /* TARGET_MACHO */
32025 /* Order the registers for register allocator. */
32028 x86_order_regs_for_local_alloc (void)
32033 /* First allocate the local general purpose registers. */
32034 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
32035 if (GENERAL_REGNO_P (i) && call_used_regs[i])
32036 reg_alloc_order [pos++] = i;
32038 /* Global general purpose registers. */
32039 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
32040 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
32041 reg_alloc_order [pos++] = i;
32043 /* x87 registers come first in case we are doing FP math
32045 if (!TARGET_SSE_MATH)
32046 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
32047 reg_alloc_order [pos++] = i;
32049 /* SSE registers. */
32050 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
32051 reg_alloc_order [pos++] = i;
32052 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
32053 reg_alloc_order [pos++] = i;
32055 /* x87 registers. */
32056 if (TARGET_SSE_MATH)
32057 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
32058 reg_alloc_order [pos++] = i;
32060 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
32061 reg_alloc_order [pos++] = i;
32063 /* Initialize the rest of array as we do not allocate some registers
32065 while (pos < FIRST_PSEUDO_REGISTER)
32066 reg_alloc_order [pos++] = 0;
32069 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
32070 in struct attribute_spec handler. */
32072 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
32074 int flags ATTRIBUTE_UNUSED,
32075 bool *no_add_attrs)
32077 if (TREE_CODE (*node) != FUNCTION_TYPE
32078 && TREE_CODE (*node) != METHOD_TYPE
32079 && TREE_CODE (*node) != FIELD_DECL
32080 && TREE_CODE (*node) != TYPE_DECL)
32082 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32084 *no_add_attrs = true;
32089 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
32091 *no_add_attrs = true;
32094 if (is_attribute_p ("callee_pop_aggregate_return", name))
32098 cst = TREE_VALUE (args);
32099 if (TREE_CODE (cst) != INTEGER_CST)
32101 warning (OPT_Wattributes,
32102 "%qE attribute requires an integer constant argument",
32104 *no_add_attrs = true;
32106 else if (compare_tree_int (cst, 0) != 0
32107 && compare_tree_int (cst, 1) != 0)
32109 warning (OPT_Wattributes,
32110 "argument to %qE attribute is neither zero, nor one",
32112 *no_add_attrs = true;
32121 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
32122 struct attribute_spec.handler. */
32124 ix86_handle_abi_attribute (tree *node, tree name,
32125 tree args ATTRIBUTE_UNUSED,
32126 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32128 if (TREE_CODE (*node) != FUNCTION_TYPE
32129 && TREE_CODE (*node) != METHOD_TYPE
32130 && TREE_CODE (*node) != FIELD_DECL
32131 && TREE_CODE (*node) != TYPE_DECL)
32133 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32135 *no_add_attrs = true;
32139 /* Can combine regparm with all attributes but fastcall. */
32140 if (is_attribute_p ("ms_abi", name))
32142 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
32144 error ("ms_abi and sysv_abi attributes are not compatible");
32149 else if (is_attribute_p ("sysv_abi", name))
32151 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
32153 error ("ms_abi and sysv_abi attributes are not compatible");
32162 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32163 struct attribute_spec.handler. */
32165 ix86_handle_struct_attribute (tree *node, tree name,
32166 tree args ATTRIBUTE_UNUSED,
32167 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32170 if (DECL_P (*node))
32172 if (TREE_CODE (*node) == TYPE_DECL)
32173 type = &TREE_TYPE (*node);
32178 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
32180 warning (OPT_Wattributes, "%qE attribute ignored",
32182 *no_add_attrs = true;
32185 else if ((is_attribute_p ("ms_struct", name)
32186 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
32187 || ((is_attribute_p ("gcc_struct", name)
32188 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
32190 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
32192 *no_add_attrs = true;
32199 ix86_handle_fndecl_attribute (tree *node, tree name,
32200 tree args ATTRIBUTE_UNUSED,
32201 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32203 if (TREE_CODE (*node) != FUNCTION_DECL)
32205 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32207 *no_add_attrs = true;
32213 ix86_ms_bitfield_layout_p (const_tree record_type)
32215 return ((TARGET_MS_BITFIELD_LAYOUT
32216 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
32217 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
32220 /* Returns an expression indicating where the this parameter is
32221 located on entry to the FUNCTION. */
32224 x86_this_parameter (tree function)
32226 tree type = TREE_TYPE (function);
32227 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
32232 const int *parm_regs;
32234 if (ix86_function_type_abi (type) == MS_ABI)
32235 parm_regs = x86_64_ms_abi_int_parameter_registers;
32237 parm_regs = x86_64_int_parameter_registers;
32238 return gen_rtx_REG (DImode, parm_regs[aggr]);
32241 nregs = ix86_function_regparm (type, function);
32243 if (nregs > 0 && !stdarg_p (type))
32246 unsigned int ccvt = ix86_get_callcvt (type);
32248 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
32249 regno = aggr ? DX_REG : CX_REG;
32250 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
32254 return gen_rtx_MEM (SImode,
32255 plus_constant (stack_pointer_rtx, 4));
32264 return gen_rtx_MEM (SImode,
32265 plus_constant (stack_pointer_rtx, 4));
32268 return gen_rtx_REG (SImode, regno);
32271 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
32274 /* Determine whether x86_output_mi_thunk can succeed. */
32277 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
32278 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
32279 HOST_WIDE_INT vcall_offset, const_tree function)
32281 /* 64-bit can handle anything. */
32285 /* For 32-bit, everything's fine if we have one free register. */
32286 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
32289 /* Need a free register for vcall_offset. */
32293 /* Need a free register for GOT references. */
32294 if (flag_pic && !targetm.binds_local_p (function))
32297 /* Otherwise ok. */
32301 /* Output the assembler code for a thunk function. THUNK_DECL is the
32302 declaration for the thunk function itself, FUNCTION is the decl for
32303 the target function. DELTA is an immediate constant offset to be
32304 added to THIS. If VCALL_OFFSET is nonzero, the word at
32305 *(*this + vcall_offset) should be added to THIS. */
32308 x86_output_mi_thunk (FILE *file,
32309 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
32310 HOST_WIDE_INT vcall_offset, tree function)
32312 rtx this_param = x86_this_parameter (function);
32313 rtx this_reg, tmp, fnaddr;
32314 unsigned int tmp_regno;
32317 tmp_regno = R10_REG;
32320 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
32321 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
32322 tmp_regno = AX_REG;
32323 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
32324 tmp_regno = DX_REG;
32326 tmp_regno = CX_REG;
32329 emit_note (NOTE_INSN_PROLOGUE_END);
32331 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
32332 pull it in now and let DELTA benefit. */
32333 if (REG_P (this_param))
32334 this_reg = this_param;
32335 else if (vcall_offset)
32337 /* Put the this parameter into %eax. */
32338 this_reg = gen_rtx_REG (Pmode, AX_REG);
32339 emit_move_insn (this_reg, this_param);
32342 this_reg = NULL_RTX;
32344 /* Adjust the this parameter by a fixed constant. */
32347 rtx delta_rtx = GEN_INT (delta);
32348 rtx delta_dst = this_reg ? this_reg : this_param;
32352 if (!x86_64_general_operand (delta_rtx, Pmode))
32354 tmp = gen_rtx_REG (Pmode, tmp_regno);
32355 emit_move_insn (tmp, delta_rtx);
32360 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
32363 /* Adjust the this parameter by a value stored in the vtable. */
32366 rtx vcall_addr, vcall_mem, this_mem;
32368 tmp = gen_rtx_REG (Pmode, tmp_regno);
32370 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
32371 if (Pmode != ptr_mode)
32372 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
32373 emit_move_insn (tmp, this_mem);
32375 /* Adjust the this parameter. */
32376 vcall_addr = plus_constant (tmp, vcall_offset);
32378 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
32380 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
32381 emit_move_insn (tmp2, GEN_INT (vcall_offset));
32382 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
32385 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
32386 if (Pmode != ptr_mode)
32387 emit_insn (gen_addsi_1_zext (this_reg,
32388 gen_rtx_REG (ptr_mode,
32392 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
32395 /* If necessary, drop THIS back to its stack slot. */
32396 if (this_reg && this_reg != this_param)
32397 emit_move_insn (this_param, this_reg);
32399 fnaddr = XEXP (DECL_RTL (function), 0);
32402 if (!flag_pic || targetm.binds_local_p (function)
32403 || cfun->machine->call_abi == MS_ABI)
32407 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
32408 tmp = gen_rtx_CONST (Pmode, tmp);
32409 fnaddr = gen_rtx_MEM (Pmode, tmp);
32414 if (!flag_pic || targetm.binds_local_p (function))
32417 else if (TARGET_MACHO)
32419 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
32420 fnaddr = XEXP (fnaddr, 0);
32422 #endif /* TARGET_MACHO */
32425 tmp = gen_rtx_REG (Pmode, CX_REG);
32426 output_set_got (tmp, NULL_RTX);
32428 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
32429 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
32430 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
32434 /* Our sibling call patterns do not allow memories, because we have no
32435 predicate that can distinguish between frame and non-frame memory.
32436 For our purposes here, we can get away with (ab)using a jump pattern,
32437 because we're going to do no optimization. */
32438 if (MEM_P (fnaddr))
32439 emit_jump_insn (gen_indirect_jump (fnaddr));
32442 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
32443 fnaddr = legitimize_pic_address (fnaddr,
32444 gen_rtx_REG (Pmode, tmp_regno));
32446 if (!sibcall_insn_operand (fnaddr, Pmode))
32448 tmp = gen_rtx_REG (Pmode, tmp_regno);
32449 if (GET_MODE (fnaddr) != Pmode)
32450 fnaddr = gen_rtx_ZERO_EXTEND (Pmode, fnaddr);
32451 emit_move_insn (tmp, fnaddr);
32455 tmp = gen_rtx_MEM (QImode, fnaddr);
32456 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
32457 tmp = emit_call_insn (tmp);
32458 SIBLING_CALL_P (tmp) = 1;
32462 /* Emit just enough of rest_of_compilation to get the insns emitted.
32463 Note that use_thunk calls assemble_start_function et al. */
32464 tmp = get_insns ();
32465 insn_locators_alloc ();
32466 shorten_branches (tmp);
32467 final_start_function (tmp, file, 1);
32468 final (tmp, file, 1);
32469 final_end_function ();
32473 x86_file_start (void)
32475 default_file_start ();
32477 darwin_file_start ();
32479 if (X86_FILE_START_VERSION_DIRECTIVE)
32480 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
32481 if (X86_FILE_START_FLTUSED)
32482 fputs ("\t.global\t__fltused\n", asm_out_file);
32483 if (ix86_asm_dialect == ASM_INTEL)
32484 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
32488 x86_field_alignment (tree field, int computed)
32490 enum machine_mode mode;
32491 tree type = TREE_TYPE (field);
32493 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
32495 mode = TYPE_MODE (strip_array_types (type));
32496 if (mode == DFmode || mode == DCmode
32497 || GET_MODE_CLASS (mode) == MODE_INT
32498 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
32499 return MIN (32, computed);
32503 /* Output assembler code to FILE to increment profiler label # LABELNO
32504 for profiling a function entry. */
32506 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
32508 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
32513 #ifndef NO_PROFILE_COUNTERS
32514 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
32517 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
32518 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
32520 fprintf (file, "\tcall\t%s\n", mcount_name);
32524 #ifndef NO_PROFILE_COUNTERS
32525 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
32528 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
32532 #ifndef NO_PROFILE_COUNTERS
32533 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
32536 fprintf (file, "\tcall\t%s\n", mcount_name);
32540 /* We don't have exact information about the insn sizes, but we may assume
32541 quite safely that we are informed about all 1 byte insns and memory
32542 address sizes. This is enough to eliminate unnecessary padding in
32546 min_insn_size (rtx insn)
32550 if (!INSN_P (insn) || !active_insn_p (insn))
32553 /* Discard alignments we've emit and jump instructions. */
32554 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
32555 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
32557 if (JUMP_TABLE_DATA_P (insn))
32560 /* Important case - calls are always 5 bytes.
32561 It is common to have many calls in the row. */
32563 && symbolic_reference_mentioned_p (PATTERN (insn))
32564 && !SIBLING_CALL_P (insn))
32566 len = get_attr_length (insn);
32570 /* For normal instructions we rely on get_attr_length being exact,
32571 with a few exceptions. */
32572 if (!JUMP_P (insn))
32574 enum attr_type type = get_attr_type (insn);
32579 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
32580 || asm_noperands (PATTERN (insn)) >= 0)
32587 /* Otherwise trust get_attr_length. */
32591 l = get_attr_length_address (insn);
32592 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
32601 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32603 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
32607 ix86_avoid_jump_mispredicts (void)
32609 rtx insn, start = get_insns ();
32610 int nbytes = 0, njumps = 0;
32613 /* Look for all minimal intervals of instructions containing 4 jumps.
32614 The intervals are bounded by START and INSN. NBYTES is the total
32615 size of instructions in the interval including INSN and not including
32616 START. When the NBYTES is smaller than 16 bytes, it is possible
32617 that the end of START and INSN ends up in the same 16byte page.
32619 The smallest offset in the page INSN can start is the case where START
32620 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
32621 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
32623 for (insn = start; insn; insn = NEXT_INSN (insn))
32627 if (LABEL_P (insn))
32629 int align = label_to_alignment (insn);
32630 int max_skip = label_to_max_skip (insn);
32634 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
32635 already in the current 16 byte page, because otherwise
32636 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
32637 bytes to reach 16 byte boundary. */
32639 || (align <= 3 && max_skip != (1 << align) - 1))
32642 fprintf (dump_file, "Label %i with max_skip %i\n",
32643 INSN_UID (insn), max_skip);
32646 while (nbytes + max_skip >= 16)
32648 start = NEXT_INSN (start);
32649 if ((JUMP_P (start)
32650 && GET_CODE (PATTERN (start)) != ADDR_VEC
32651 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
32653 njumps--, isjump = 1;
32656 nbytes -= min_insn_size (start);
32662 min_size = min_insn_size (insn);
32663 nbytes += min_size;
32665 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
32666 INSN_UID (insn), min_size);
32668 && GET_CODE (PATTERN (insn)) != ADDR_VEC
32669 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
32677 start = NEXT_INSN (start);
32678 if ((JUMP_P (start)
32679 && GET_CODE (PATTERN (start)) != ADDR_VEC
32680 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
32682 njumps--, isjump = 1;
32685 nbytes -= min_insn_size (start);
32687 gcc_assert (njumps >= 0);
32689 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
32690 INSN_UID (start), INSN_UID (insn), nbytes);
32692 if (njumps == 3 && isjump && nbytes < 16)
32694 int padsize = 15 - nbytes + min_insn_size (insn);
32697 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
32698 INSN_UID (insn), padsize);
32699 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
32705 /* AMD Athlon works faster
32706 when RET is not destination of conditional jump or directly preceded
32707 by other jump instruction. We avoid the penalty by inserting NOP just
32708 before the RET instructions in such cases. */
32710 ix86_pad_returns (void)
32715 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
32717 basic_block bb = e->src;
32718 rtx ret = BB_END (bb);
32720 bool replace = false;
32722 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
32723 || optimize_bb_for_size_p (bb))
32725 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
32726 if (active_insn_p (prev) || LABEL_P (prev))
32728 if (prev && LABEL_P (prev))
32733 FOR_EACH_EDGE (e, ei, bb->preds)
32734 if (EDGE_FREQUENCY (e) && e->src->index >= 0
32735 && !(e->flags & EDGE_FALLTHRU))
32740 prev = prev_active_insn (ret);
32742 && ((JUMP_P (prev) && any_condjump_p (prev))
32745 /* Empty functions get branch mispredict even when
32746 the jump destination is not visible to us. */
32747 if (!prev && !optimize_function_for_size_p (cfun))
32752 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
32758 /* Count the minimum number of instructions in BB. Return 4 if the
32759 number of instructions >= 4. */
32762 ix86_count_insn_bb (basic_block bb)
32765 int insn_count = 0;
32767 /* Count number of instructions in this block. Return 4 if the number
32768 of instructions >= 4. */
32769 FOR_BB_INSNS (bb, insn)
32771 /* Only happen in exit blocks. */
32773 && ANY_RETURN_P (PATTERN (insn)))
32776 if (NONDEBUG_INSN_P (insn)
32777 && GET_CODE (PATTERN (insn)) != USE
32778 && GET_CODE (PATTERN (insn)) != CLOBBER)
32781 if (insn_count >= 4)
32790 /* Count the minimum number of instructions in code path in BB.
32791 Return 4 if the number of instructions >= 4. */
32794 ix86_count_insn (basic_block bb)
32798 int min_prev_count;
32800 /* Only bother counting instructions along paths with no
32801 more than 2 basic blocks between entry and exit. Given
32802 that BB has an edge to exit, determine if a predecessor
32803 of BB has an edge from entry. If so, compute the number
32804 of instructions in the predecessor block. If there
32805 happen to be multiple such blocks, compute the minimum. */
32806 min_prev_count = 4;
32807 FOR_EACH_EDGE (e, ei, bb->preds)
32810 edge_iterator prev_ei;
32812 if (e->src == ENTRY_BLOCK_PTR)
32814 min_prev_count = 0;
32817 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
32819 if (prev_e->src == ENTRY_BLOCK_PTR)
32821 int count = ix86_count_insn_bb (e->src);
32822 if (count < min_prev_count)
32823 min_prev_count = count;
32829 if (min_prev_count < 4)
32830 min_prev_count += ix86_count_insn_bb (bb);
32832 return min_prev_count;
32835 /* Pad short funtion to 4 instructions. */
32838 ix86_pad_short_function (void)
32843 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
32845 rtx ret = BB_END (e->src);
32846 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
32848 int insn_count = ix86_count_insn (e->src);
32850 /* Pad short function. */
32851 if (insn_count < 4)
32855 /* Find epilogue. */
32858 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
32859 insn = PREV_INSN (insn);
32864 /* Two NOPs count as one instruction. */
32865 insn_count = 2 * (4 - insn_count);
32866 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
32872 /* Implement machine specific optimizations. We implement padding of returns
32873 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
32877 /* We are freeing block_for_insn in the toplev to keep compatibility
32878 with old MDEP_REORGS that are not CFG based. Recompute it now. */
32879 compute_bb_for_insn ();
32881 /* Run the vzeroupper optimization if needed. */
32882 if (TARGET_VZEROUPPER)
32883 move_or_delete_vzeroupper ();
32885 if (optimize && optimize_function_for_speed_p (cfun))
32887 if (TARGET_PAD_SHORT_FUNCTION)
32888 ix86_pad_short_function ();
32889 else if (TARGET_PAD_RETURNS)
32890 ix86_pad_returns ();
32891 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32892 if (TARGET_FOUR_JUMP_LIMIT)
32893 ix86_avoid_jump_mispredicts ();
32898 /* Return nonzero when QImode register that must be represented via REX prefix
32901 x86_extended_QIreg_mentioned_p (rtx insn)
32904 extract_insn_cached (insn);
32905 for (i = 0; i < recog_data.n_operands; i++)
32906 if (REG_P (recog_data.operand[i])
32907 && REGNO (recog_data.operand[i]) > BX_REG)
32912 /* Return nonzero when P points to register encoded via REX prefix.
32913 Called via for_each_rtx. */
32915 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
32917 unsigned int regno;
32920 regno = REGNO (*p);
32921 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
32924 /* Return true when INSN mentions register that must be encoded using REX
32927 x86_extended_reg_mentioned_p (rtx insn)
32929 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
32930 extended_reg_mentioned_1, NULL);
32933 /* If profitable, negate (without causing overflow) integer constant
32934 of mode MODE at location LOC. Return true in this case. */
32936 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
32940 if (!CONST_INT_P (*loc))
32946 /* DImode x86_64 constants must fit in 32 bits. */
32947 gcc_assert (x86_64_immediate_operand (*loc, mode));
32958 gcc_unreachable ();
32961 /* Avoid overflows. */
32962 if (mode_signbit_p (mode, *loc))
32965 val = INTVAL (*loc);
32967 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
32968 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
32969 if ((val < 0 && val != -128)
32972 *loc = GEN_INT (-val);
32979 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
32980 optabs would emit if we didn't have TFmode patterns. */
32983 x86_emit_floatuns (rtx operands[2])
32985 rtx neglab, donelab, i0, i1, f0, in, out;
32986 enum machine_mode mode, inmode;
32988 inmode = GET_MODE (operands[1]);
32989 gcc_assert (inmode == SImode || inmode == DImode);
32992 in = force_reg (inmode, operands[1]);
32993 mode = GET_MODE (out);
32994 neglab = gen_label_rtx ();
32995 donelab = gen_label_rtx ();
32996 f0 = gen_reg_rtx (mode);
32998 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
33000 expand_float (out, in, 0);
33002 emit_jump_insn (gen_jump (donelab));
33005 emit_label (neglab);
33007 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
33009 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
33011 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
33013 expand_float (f0, i0, 0);
33015 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
33017 emit_label (donelab);
33020 /* AVX2 does support 32-byte integer vector operations,
33021 thus the longest vector we are faced with is V32QImode. */
33022 #define MAX_VECT_LEN 32
33024 struct expand_vec_perm_d
33026 rtx target, op0, op1;
33027 unsigned char perm[MAX_VECT_LEN];
33028 enum machine_mode vmode;
33029 unsigned char nelt;
33033 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
33034 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
33036 /* Get a vector mode of the same size as the original but with elements
33037 twice as wide. This is only guaranteed to apply to integral vectors. */
33039 static inline enum machine_mode
33040 get_mode_wider_vector (enum machine_mode o)
33042 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
33043 enum machine_mode n = GET_MODE_WIDER_MODE (o);
33044 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
33045 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
33049 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33050 with all elements equal to VAR. Return true if successful. */
33053 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
33054 rtx target, rtx val)
33077 /* First attempt to recognize VAL as-is. */
33078 dup = gen_rtx_VEC_DUPLICATE (mode, val);
33079 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
33080 if (recog_memoized (insn) < 0)
33083 /* If that fails, force VAL into a register. */
33086 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
33087 seq = get_insns ();
33090 emit_insn_before (seq, insn);
33092 ok = recog_memoized (insn) >= 0;
33101 if (TARGET_SSE || TARGET_3DNOW_A)
33105 val = gen_lowpart (SImode, val);
33106 x = gen_rtx_TRUNCATE (HImode, val);
33107 x = gen_rtx_VEC_DUPLICATE (mode, x);
33108 emit_insn (gen_rtx_SET (VOIDmode, target, x));
33121 struct expand_vec_perm_d dperm;
33125 memset (&dperm, 0, sizeof (dperm));
33126 dperm.target = target;
33127 dperm.vmode = mode;
33128 dperm.nelt = GET_MODE_NUNITS (mode);
33129 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
33131 /* Extend to SImode using a paradoxical SUBREG. */
33132 tmp1 = gen_reg_rtx (SImode);
33133 emit_move_insn (tmp1, gen_lowpart (SImode, val));
33135 /* Insert the SImode value as low element of a V4SImode vector. */
33136 tmp2 = gen_lowpart (V4SImode, dperm.op0);
33137 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
33139 ok = (expand_vec_perm_1 (&dperm)
33140 || expand_vec_perm_broadcast_1 (&dperm));
33152 /* Replicate the value once into the next wider mode and recurse. */
33154 enum machine_mode smode, wsmode, wvmode;
33157 smode = GET_MODE_INNER (mode);
33158 wvmode = get_mode_wider_vector (mode);
33159 wsmode = GET_MODE_INNER (wvmode);
33161 val = convert_modes (wsmode, smode, val, true);
33162 x = expand_simple_binop (wsmode, ASHIFT, val,
33163 GEN_INT (GET_MODE_BITSIZE (smode)),
33164 NULL_RTX, 1, OPTAB_LIB_WIDEN);
33165 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
33167 x = gen_lowpart (wvmode, target);
33168 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
33176 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
33177 rtx x = gen_reg_rtx (hvmode);
33179 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
33182 x = gen_rtx_VEC_CONCAT (mode, x, x);
33183 emit_insn (gen_rtx_SET (VOIDmode, target, x));
33192 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33193 whose ONE_VAR element is VAR, and other elements are zero. Return true
33197 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
33198 rtx target, rtx var, int one_var)
33200 enum machine_mode vsimode;
33203 bool use_vector_set = false;
33208 /* For SSE4.1, we normally use vector set. But if the second
33209 element is zero and inter-unit moves are OK, we use movq
33211 use_vector_set = (TARGET_64BIT
33213 && !(TARGET_INTER_UNIT_MOVES
33219 use_vector_set = TARGET_SSE4_1;
33222 use_vector_set = TARGET_SSE2;
33225 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
33232 use_vector_set = TARGET_AVX;
33235 /* Use ix86_expand_vector_set in 64bit mode only. */
33236 use_vector_set = TARGET_AVX && TARGET_64BIT;
33242 if (use_vector_set)
33244 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
33245 var = force_reg (GET_MODE_INNER (mode), var);
33246 ix86_expand_vector_set (mmx_ok, target, var, one_var);
33262 var = force_reg (GET_MODE_INNER (mode), var);
33263 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
33264 emit_insn (gen_rtx_SET (VOIDmode, target, x));
33269 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
33270 new_target = gen_reg_rtx (mode);
33272 new_target = target;
33273 var = force_reg (GET_MODE_INNER (mode), var);
33274 x = gen_rtx_VEC_DUPLICATE (mode, var);
33275 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
33276 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
33279 /* We need to shuffle the value to the correct position, so
33280 create a new pseudo to store the intermediate result. */
33282 /* With SSE2, we can use the integer shuffle insns. */
33283 if (mode != V4SFmode && TARGET_SSE2)
33285 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
33287 GEN_INT (one_var == 1 ? 0 : 1),
33288 GEN_INT (one_var == 2 ? 0 : 1),
33289 GEN_INT (one_var == 3 ? 0 : 1)));
33290 if (target != new_target)
33291 emit_move_insn (target, new_target);
33295 /* Otherwise convert the intermediate result to V4SFmode and
33296 use the SSE1 shuffle instructions. */
33297 if (mode != V4SFmode)
33299 tmp = gen_reg_rtx (V4SFmode);
33300 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
33305 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
33307 GEN_INT (one_var == 1 ? 0 : 1),
33308 GEN_INT (one_var == 2 ? 0+4 : 1+4),
33309 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
33311 if (mode != V4SFmode)
33312 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
33313 else if (tmp != target)
33314 emit_move_insn (target, tmp);
33316 else if (target != new_target)
33317 emit_move_insn (target, new_target);
33322 vsimode = V4SImode;
33328 vsimode = V2SImode;
33334 /* Zero extend the variable element to SImode and recurse. */
33335 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
33337 x = gen_reg_rtx (vsimode);
33338 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
33340 gcc_unreachable ();
33342 emit_move_insn (target, gen_lowpart (mode, x));
33350 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33351 consisting of the values in VALS. It is known that all elements
33352 except ONE_VAR are constants. Return true if successful. */
33355 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
33356 rtx target, rtx vals, int one_var)
33358 rtx var = XVECEXP (vals, 0, one_var);
33359 enum machine_mode wmode;
33362 const_vec = copy_rtx (vals);
33363 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
33364 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
33372 /* For the two element vectors, it's just as easy to use
33373 the general case. */
33377 /* Use ix86_expand_vector_set in 64bit mode only. */
33400 /* There's no way to set one QImode entry easily. Combine
33401 the variable value with its adjacent constant value, and
33402 promote to an HImode set. */
33403 x = XVECEXP (vals, 0, one_var ^ 1);
33406 var = convert_modes (HImode, QImode, var, true);
33407 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
33408 NULL_RTX, 1, OPTAB_LIB_WIDEN);
33409 x = GEN_INT (INTVAL (x) & 0xff);
33413 var = convert_modes (HImode, QImode, var, true);
33414 x = gen_int_mode (INTVAL (x) << 8, HImode);
33416 if (x != const0_rtx)
33417 var = expand_simple_binop (HImode, IOR, var, x, var,
33418 1, OPTAB_LIB_WIDEN);
33420 x = gen_reg_rtx (wmode);
33421 emit_move_insn (x, gen_lowpart (wmode, const_vec));
33422 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
33424 emit_move_insn (target, gen_lowpart (mode, x));
33431 emit_move_insn (target, const_vec);
33432 ix86_expand_vector_set (mmx_ok, target, var, one_var);
33436 /* A subroutine of ix86_expand_vector_init_general. Use vector
33437 concatenate to handle the most general case: all values variable,
33438 and none identical. */
33441 ix86_expand_vector_init_concat (enum machine_mode mode,
33442 rtx target, rtx *ops, int n)
33444 enum machine_mode cmode, hmode = VOIDmode;
33445 rtx first[8], second[4];
33485 gcc_unreachable ();
33488 if (!register_operand (ops[1], cmode))
33489 ops[1] = force_reg (cmode, ops[1]);
33490 if (!register_operand (ops[0], cmode))
33491 ops[0] = force_reg (cmode, ops[0]);
33492 emit_insn (gen_rtx_SET (VOIDmode, target,
33493 gen_rtx_VEC_CONCAT (mode, ops[0],
33513 gcc_unreachable ();
33529 gcc_unreachable ();
33534 /* FIXME: We process inputs backward to help RA. PR 36222. */
33537 for (; i > 0; i -= 2, j--)
33539 first[j] = gen_reg_rtx (cmode);
33540 v = gen_rtvec (2, ops[i - 1], ops[i]);
33541 ix86_expand_vector_init (false, first[j],
33542 gen_rtx_PARALLEL (cmode, v));
33548 gcc_assert (hmode != VOIDmode);
33549 for (i = j = 0; i < n; i += 2, j++)
33551 second[j] = gen_reg_rtx (hmode);
33552 ix86_expand_vector_init_concat (hmode, second [j],
33556 ix86_expand_vector_init_concat (mode, target, second, n);
33559 ix86_expand_vector_init_concat (mode, target, first, n);
33563 gcc_unreachable ();
33567 /* A subroutine of ix86_expand_vector_init_general. Use vector
33568 interleave to handle the most general case: all values variable,
33569 and none identical. */
33572 ix86_expand_vector_init_interleave (enum machine_mode mode,
33573 rtx target, rtx *ops, int n)
33575 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
33578 rtx (*gen_load_even) (rtx, rtx, rtx);
33579 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
33580 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
33585 gen_load_even = gen_vec_setv8hi;
33586 gen_interleave_first_low = gen_vec_interleave_lowv4si;
33587 gen_interleave_second_low = gen_vec_interleave_lowv2di;
33588 inner_mode = HImode;
33589 first_imode = V4SImode;
33590 second_imode = V2DImode;
33591 third_imode = VOIDmode;
33594 gen_load_even = gen_vec_setv16qi;
33595 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
33596 gen_interleave_second_low = gen_vec_interleave_lowv4si;
33597 inner_mode = QImode;
33598 first_imode = V8HImode;
33599 second_imode = V4SImode;
33600 third_imode = V2DImode;
33603 gcc_unreachable ();
33606 for (i = 0; i < n; i++)
33608 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
33609 op0 = gen_reg_rtx (SImode);
33610 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
33612 /* Insert the SImode value as low element of V4SImode vector. */
33613 op1 = gen_reg_rtx (V4SImode);
33614 op0 = gen_rtx_VEC_MERGE (V4SImode,
33615 gen_rtx_VEC_DUPLICATE (V4SImode,
33617 CONST0_RTX (V4SImode),
33619 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
33621 /* Cast the V4SImode vector back to a vector in orignal mode. */
33622 op0 = gen_reg_rtx (mode);
33623 emit_move_insn (op0, gen_lowpart (mode, op1));
33625 /* Load even elements into the second positon. */
33626 emit_insn (gen_load_even (op0,
33627 force_reg (inner_mode,
33631 /* Cast vector to FIRST_IMODE vector. */
33632 ops[i] = gen_reg_rtx (first_imode);
33633 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
33636 /* Interleave low FIRST_IMODE vectors. */
33637 for (i = j = 0; i < n; i += 2, j++)
33639 op0 = gen_reg_rtx (first_imode);
33640 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
33642 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
33643 ops[j] = gen_reg_rtx (second_imode);
33644 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
33647 /* Interleave low SECOND_IMODE vectors. */
33648 switch (second_imode)
33651 for (i = j = 0; i < n / 2; i += 2, j++)
33653 op0 = gen_reg_rtx (second_imode);
33654 emit_insn (gen_interleave_second_low (op0, ops[i],
33657 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
33659 ops[j] = gen_reg_rtx (third_imode);
33660 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
33662 second_imode = V2DImode;
33663 gen_interleave_second_low = gen_vec_interleave_lowv2di;
33667 op0 = gen_reg_rtx (second_imode);
33668 emit_insn (gen_interleave_second_low (op0, ops[0],
33671 /* Cast the SECOND_IMODE vector back to a vector on original
33673 emit_insn (gen_rtx_SET (VOIDmode, target,
33674 gen_lowpart (mode, op0)));
33678 gcc_unreachable ();
33682 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
33683 all values variable, and none identical. */
33686 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
33687 rtx target, rtx vals)
33689 rtx ops[32], op0, op1;
33690 enum machine_mode half_mode = VOIDmode;
33697 if (!mmx_ok && !TARGET_SSE)
33709 n = GET_MODE_NUNITS (mode);
33710 for (i = 0; i < n; i++)
33711 ops[i] = XVECEXP (vals, 0, i);
33712 ix86_expand_vector_init_concat (mode, target, ops, n);
33716 half_mode = V16QImode;
33720 half_mode = V8HImode;
33724 n = GET_MODE_NUNITS (mode);
33725 for (i = 0; i < n; i++)
33726 ops[i] = XVECEXP (vals, 0, i);
33727 op0 = gen_reg_rtx (half_mode);
33728 op1 = gen_reg_rtx (half_mode);
33729 ix86_expand_vector_init_interleave (half_mode, op0, ops,
33731 ix86_expand_vector_init_interleave (half_mode, op1,
33732 &ops [n >> 1], n >> 2);
33733 emit_insn (gen_rtx_SET (VOIDmode, target,
33734 gen_rtx_VEC_CONCAT (mode, op0, op1)));
33738 if (!TARGET_SSE4_1)
33746 /* Don't use ix86_expand_vector_init_interleave if we can't
33747 move from GPR to SSE register directly. */
33748 if (!TARGET_INTER_UNIT_MOVES)
33751 n = GET_MODE_NUNITS (mode);
33752 for (i = 0; i < n; i++)
33753 ops[i] = XVECEXP (vals, 0, i);
33754 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
33762 gcc_unreachable ();
33766 int i, j, n_elts, n_words, n_elt_per_word;
33767 enum machine_mode inner_mode;
33768 rtx words[4], shift;
33770 inner_mode = GET_MODE_INNER (mode);
33771 n_elts = GET_MODE_NUNITS (mode);
33772 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
33773 n_elt_per_word = n_elts / n_words;
33774 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
33776 for (i = 0; i < n_words; ++i)
33778 rtx word = NULL_RTX;
33780 for (j = 0; j < n_elt_per_word; ++j)
33782 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
33783 elt = convert_modes (word_mode, inner_mode, elt, true);
33789 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
33790 word, 1, OPTAB_LIB_WIDEN);
33791 word = expand_simple_binop (word_mode, IOR, word, elt,
33792 word, 1, OPTAB_LIB_WIDEN);
33800 emit_move_insn (target, gen_lowpart (mode, words[0]));
33801 else if (n_words == 2)
33803 rtx tmp = gen_reg_rtx (mode);
33804 emit_clobber (tmp);
33805 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
33806 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
33807 emit_move_insn (target, tmp);
33809 else if (n_words == 4)
33811 rtx tmp = gen_reg_rtx (V4SImode);
33812 gcc_assert (word_mode == SImode);
33813 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
33814 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
33815 emit_move_insn (target, gen_lowpart (mode, tmp));
33818 gcc_unreachable ();
33822 /* Initialize vector TARGET via VALS. Suppress the use of MMX
33823 instructions unless MMX_OK is true. */
33826 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
33828 enum machine_mode mode = GET_MODE (target);
33829 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33830 int n_elts = GET_MODE_NUNITS (mode);
33831 int n_var = 0, one_var = -1;
33832 bool all_same = true, all_const_zero = true;
33836 for (i = 0; i < n_elts; ++i)
33838 x = XVECEXP (vals, 0, i);
33839 if (!(CONST_INT_P (x)
33840 || GET_CODE (x) == CONST_DOUBLE
33841 || GET_CODE (x) == CONST_FIXED))
33842 n_var++, one_var = i;
33843 else if (x != CONST0_RTX (inner_mode))
33844 all_const_zero = false;
33845 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
33849 /* Constants are best loaded from the constant pool. */
33852 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
33856 /* If all values are identical, broadcast the value. */
33858 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
33859 XVECEXP (vals, 0, 0)))
33862 /* Values where only one field is non-constant are best loaded from
33863 the pool and overwritten via move later. */
33867 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
33868 XVECEXP (vals, 0, one_var),
33872 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
33876 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
33880 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
33882 enum machine_mode mode = GET_MODE (target);
33883 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33884 enum machine_mode half_mode;
33885 bool use_vec_merge = false;
33887 static rtx (*gen_extract[6][2]) (rtx, rtx)
33889 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
33890 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
33891 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
33892 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
33893 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
33894 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
33896 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
33898 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
33899 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
33900 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
33901 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
33902 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
33903 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
33913 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
33914 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
33916 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
33918 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
33919 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33925 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
33929 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
33930 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
33932 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
33934 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
33935 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33942 /* For the two element vectors, we implement a VEC_CONCAT with
33943 the extraction of the other element. */
33945 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
33946 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
33949 op0 = val, op1 = tmp;
33951 op0 = tmp, op1 = val;
33953 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
33954 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33959 use_vec_merge = TARGET_SSE4_1;
33966 use_vec_merge = true;
33970 /* tmp = target = A B C D */
33971 tmp = copy_to_reg (target);
33972 /* target = A A B B */
33973 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
33974 /* target = X A B B */
33975 ix86_expand_vector_set (false, target, val, 0);
33976 /* target = A X C D */
33977 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33978 const1_rtx, const0_rtx,
33979 GEN_INT (2+4), GEN_INT (3+4)));
33983 /* tmp = target = A B C D */
33984 tmp = copy_to_reg (target);
33985 /* tmp = X B C D */
33986 ix86_expand_vector_set (false, tmp, val, 0);
33987 /* target = A B X D */
33988 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33989 const0_rtx, const1_rtx,
33990 GEN_INT (0+4), GEN_INT (3+4)));
33994 /* tmp = target = A B C D */
33995 tmp = copy_to_reg (target);
33996 /* tmp = X B C D */
33997 ix86_expand_vector_set (false, tmp, val, 0);
33998 /* target = A B X D */
33999 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
34000 const0_rtx, const1_rtx,
34001 GEN_INT (2+4), GEN_INT (0+4)));
34005 gcc_unreachable ();
34010 use_vec_merge = TARGET_SSE4_1;
34014 /* Element 0 handled by vec_merge below. */
34017 use_vec_merge = true;
34023 /* With SSE2, use integer shuffles to swap element 0 and ELT,
34024 store into element 0, then shuffle them back. */
34028 order[0] = GEN_INT (elt);
34029 order[1] = const1_rtx;
34030 order[2] = const2_rtx;
34031 order[3] = GEN_INT (3);
34032 order[elt] = const0_rtx;
34034 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
34035 order[1], order[2], order[3]));
34037 ix86_expand_vector_set (false, target, val, 0);
34039 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
34040 order[1], order[2], order[3]));
34044 /* For SSE1, we have to reuse the V4SF code. */
34045 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
34046 gen_lowpart (SFmode, val), elt);
34051 use_vec_merge = TARGET_SSE2;
34054 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
34058 use_vec_merge = TARGET_SSE4_1;
34065 half_mode = V16QImode;
34071 half_mode = V8HImode;
34077 half_mode = V4SImode;
34083 half_mode = V2DImode;
34089 half_mode = V4SFmode;
34095 half_mode = V2DFmode;
34101 /* Compute offset. */
34105 gcc_assert (i <= 1);
34107 /* Extract the half. */
34108 tmp = gen_reg_rtx (half_mode);
34109 emit_insn (gen_extract[j][i] (tmp, target));
34111 /* Put val in tmp at elt. */
34112 ix86_expand_vector_set (false, tmp, val, elt);
34115 emit_insn (gen_insert[j][i] (target, target, tmp));
34124 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
34125 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
34126 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
34130 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
34132 emit_move_insn (mem, target);
34134 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
34135 emit_move_insn (tmp, val);
34137 emit_move_insn (target, mem);
34142 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
34144 enum machine_mode mode = GET_MODE (vec);
34145 enum machine_mode inner_mode = GET_MODE_INNER (mode);
34146 bool use_vec_extr = false;
34159 use_vec_extr = true;
34163 use_vec_extr = TARGET_SSE4_1;
34175 tmp = gen_reg_rtx (mode);
34176 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
34177 GEN_INT (elt), GEN_INT (elt),
34178 GEN_INT (elt+4), GEN_INT (elt+4)));
34182 tmp = gen_reg_rtx (mode);
34183 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
34187 gcc_unreachable ();
34190 use_vec_extr = true;
34195 use_vec_extr = TARGET_SSE4_1;
34209 tmp = gen_reg_rtx (mode);
34210 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
34211 GEN_INT (elt), GEN_INT (elt),
34212 GEN_INT (elt), GEN_INT (elt)));
34216 tmp = gen_reg_rtx (mode);
34217 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
34221 gcc_unreachable ();
34224 use_vec_extr = true;
34229 /* For SSE1, we have to reuse the V4SF code. */
34230 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
34231 gen_lowpart (V4SFmode, vec), elt);
34237 use_vec_extr = TARGET_SSE2;
34240 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
34244 use_vec_extr = TARGET_SSE4_1;
34250 tmp = gen_reg_rtx (V4SFmode);
34252 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
34254 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
34255 ix86_expand_vector_extract (false, target, tmp, elt & 3);
34263 tmp = gen_reg_rtx (V2DFmode);
34265 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
34267 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
34268 ix86_expand_vector_extract (false, target, tmp, elt & 1);
34276 tmp = gen_reg_rtx (V16QImode);
34278 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
34280 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
34281 ix86_expand_vector_extract (false, target, tmp, elt & 15);
34289 tmp = gen_reg_rtx (V8HImode);
34291 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
34293 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
34294 ix86_expand_vector_extract (false, target, tmp, elt & 7);
34302 tmp = gen_reg_rtx (V4SImode);
34304 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
34306 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
34307 ix86_expand_vector_extract (false, target, tmp, elt & 3);
34315 tmp = gen_reg_rtx (V2DImode);
34317 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
34319 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
34320 ix86_expand_vector_extract (false, target, tmp, elt & 1);
34326 /* ??? Could extract the appropriate HImode element and shift. */
34333 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
34334 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
34336 /* Let the rtl optimizers know about the zero extension performed. */
34337 if (inner_mode == QImode || inner_mode == HImode)
34339 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
34340 target = gen_lowpart (SImode, target);
34343 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
34347 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
34349 emit_move_insn (mem, vec);
34351 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
34352 emit_move_insn (target, tmp);
34356 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
34357 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
34358 The upper bits of DEST are undefined, though they shouldn't cause
34359 exceptions (some bits from src or all zeros are ok). */
34362 emit_reduc_half (rtx dest, rtx src, int i)
34365 switch (GET_MODE (src))
34369 tem = gen_sse_movhlps (dest, src, src);
34371 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
34372 GEN_INT (1 + 4), GEN_INT (1 + 4));
34375 tem = gen_vec_interleave_highv2df (dest, src, src);
34381 tem = gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, dest),
34382 gen_lowpart (V1TImode, src),
34387 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
34389 tem = gen_avx_shufps256 (dest, src, src,
34390 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
34394 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
34396 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
34403 tem = gen_avx2_permv2ti (gen_lowpart (V4DImode, dest),
34404 gen_lowpart (V4DImode, src),
34405 gen_lowpart (V4DImode, src),
34408 tem = gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, dest),
34409 gen_lowpart (V2TImode, src),
34413 gcc_unreachable ();
34418 /* Expand a vector reduction. FN is the binary pattern to reduce;
34419 DEST is the destination; IN is the input vector. */
34422 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
34424 rtx half, dst, vec = in;
34425 enum machine_mode mode = GET_MODE (in);
34428 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
34430 && mode == V8HImode
34431 && fn == gen_uminv8hi3)
34433 emit_insn (gen_sse4_1_phminposuw (dest, in));
34437 for (i = GET_MODE_BITSIZE (mode);
34438 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
34441 half = gen_reg_rtx (mode);
34442 emit_reduc_half (half, vec, i);
34443 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
34446 dst = gen_reg_rtx (mode);
34447 emit_insn (fn (dst, half, vec));
34452 /* Target hook for scalar_mode_supported_p. */
34454 ix86_scalar_mode_supported_p (enum machine_mode mode)
34456 if (DECIMAL_FLOAT_MODE_P (mode))
34457 return default_decimal_float_supported_p ();
34458 else if (mode == TFmode)
34461 return default_scalar_mode_supported_p (mode);
34464 /* Implements target hook vector_mode_supported_p. */
34466 ix86_vector_mode_supported_p (enum machine_mode mode)
34468 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
34470 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
34472 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
34474 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
34476 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
34481 /* Target hook for c_mode_for_suffix. */
34482 static enum machine_mode
34483 ix86_c_mode_for_suffix (char suffix)
34493 /* Worker function for TARGET_MD_ASM_CLOBBERS.
34495 We do this in the new i386 backend to maintain source compatibility
34496 with the old cc0-based compiler. */
34499 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
34500 tree inputs ATTRIBUTE_UNUSED,
34503 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
34505 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
34510 /* Implements target vector targetm.asm.encode_section_info. */
34512 static void ATTRIBUTE_UNUSED
34513 ix86_encode_section_info (tree decl, rtx rtl, int first)
34515 default_encode_section_info (decl, rtl, first);
34517 if (TREE_CODE (decl) == VAR_DECL
34518 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
34519 && ix86_in_large_data_p (decl))
34520 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
34523 /* Worker function for REVERSE_CONDITION. */
34526 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
34528 return (mode != CCFPmode && mode != CCFPUmode
34529 ? reverse_condition (code)
34530 : reverse_condition_maybe_unordered (code));
34533 /* Output code to perform an x87 FP register move, from OPERANDS[1]
34537 output_387_reg_move (rtx insn, rtx *operands)
34539 if (REG_P (operands[0]))
34541 if (REG_P (operands[1])
34542 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
34544 if (REGNO (operands[0]) == FIRST_STACK_REG)
34545 return output_387_ffreep (operands, 0);
34546 return "fstp\t%y0";
34548 if (STACK_TOP_P (operands[0]))
34549 return "fld%Z1\t%y1";
34552 else if (MEM_P (operands[0]))
34554 gcc_assert (REG_P (operands[1]));
34555 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
34556 return "fstp%Z0\t%y0";
34559 /* There is no non-popping store to memory for XFmode.
34560 So if we need one, follow the store with a load. */
34561 if (GET_MODE (operands[0]) == XFmode)
34562 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
34564 return "fst%Z0\t%y0";
34571 /* Output code to perform a conditional jump to LABEL, if C2 flag in
34572 FP status register is set. */
34575 ix86_emit_fp_unordered_jump (rtx label)
34577 rtx reg = gen_reg_rtx (HImode);
34580 emit_insn (gen_x86_fnstsw_1 (reg));
34582 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
34584 emit_insn (gen_x86_sahf_1 (reg));
34586 temp = gen_rtx_REG (CCmode, FLAGS_REG);
34587 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
34591 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
34593 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
34594 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
34597 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
34598 gen_rtx_LABEL_REF (VOIDmode, label),
34600 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
34602 emit_jump_insn (temp);
34603 predict_jump (REG_BR_PROB_BASE * 10 / 100);
34606 /* Output code to perform a log1p XFmode calculation. */
34608 void ix86_emit_i387_log1p (rtx op0, rtx op1)
34610 rtx label1 = gen_label_rtx ();
34611 rtx label2 = gen_label_rtx ();
34613 rtx tmp = gen_reg_rtx (XFmode);
34614 rtx tmp2 = gen_reg_rtx (XFmode);
34617 emit_insn (gen_absxf2 (tmp, op1));
34618 test = gen_rtx_GE (VOIDmode, tmp,
34619 CONST_DOUBLE_FROM_REAL_VALUE (
34620 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
34622 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
34624 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
34625 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
34626 emit_jump (label2);
34628 emit_label (label1);
34629 emit_move_insn (tmp, CONST1_RTX (XFmode));
34630 emit_insn (gen_addxf3 (tmp, op1, tmp));
34631 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
34632 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
34634 emit_label (label2);
34637 /* Emit code for round calculation. */
34638 void ix86_emit_i387_round (rtx op0, rtx op1)
34640 enum machine_mode inmode = GET_MODE (op1);
34641 enum machine_mode outmode = GET_MODE (op0);
34642 rtx e1, e2, res, tmp, tmp1, half;
34643 rtx scratch = gen_reg_rtx (HImode);
34644 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
34645 rtx jump_label = gen_label_rtx ();
34647 rtx (*gen_abs) (rtx, rtx);
34648 rtx (*gen_neg) (rtx, rtx);
34653 gen_abs = gen_abssf2;
34656 gen_abs = gen_absdf2;
34659 gen_abs = gen_absxf2;
34662 gcc_unreachable ();
34668 gen_neg = gen_negsf2;
34671 gen_neg = gen_negdf2;
34674 gen_neg = gen_negxf2;
34677 gen_neg = gen_neghi2;
34680 gen_neg = gen_negsi2;
34683 gen_neg = gen_negdi2;
34686 gcc_unreachable ();
34689 e1 = gen_reg_rtx (inmode);
34690 e2 = gen_reg_rtx (inmode);
34691 res = gen_reg_rtx (outmode);
34693 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
34695 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
34697 /* scratch = fxam(op1) */
34698 emit_insn (gen_rtx_SET (VOIDmode, scratch,
34699 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
34701 /* e1 = fabs(op1) */
34702 emit_insn (gen_abs (e1, op1));
34704 /* e2 = e1 + 0.5 */
34705 half = force_reg (inmode, half);
34706 emit_insn (gen_rtx_SET (VOIDmode, e2,
34707 gen_rtx_PLUS (inmode, e1, half)));
34709 /* res = floor(e2) */
34710 if (inmode != XFmode)
34712 tmp1 = gen_reg_rtx (XFmode);
34714 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
34715 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
34725 rtx tmp0 = gen_reg_rtx (XFmode);
34727 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
34729 emit_insn (gen_rtx_SET (VOIDmode, res,
34730 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
34731 UNSPEC_TRUNC_NOOP)));
34735 emit_insn (gen_frndintxf2_floor (res, tmp1));
34738 emit_insn (gen_lfloorxfhi2 (res, tmp1));
34741 emit_insn (gen_lfloorxfsi2 (res, tmp1));
34744 emit_insn (gen_lfloorxfdi2 (res, tmp1));
34747 gcc_unreachable ();
34750 /* flags = signbit(a) */
34751 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
34753 /* if (flags) then res = -res */
34754 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
34755 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
34756 gen_rtx_LABEL_REF (VOIDmode, jump_label),
34758 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
34759 predict_jump (REG_BR_PROB_BASE * 50 / 100);
34760 JUMP_LABEL (insn) = jump_label;
34762 emit_insn (gen_neg (res, res));
34764 emit_label (jump_label);
34765 LABEL_NUSES (jump_label) = 1;
34767 emit_move_insn (op0, res);
34770 /* Output code to perform a Newton-Rhapson approximation of a single precision
34771 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
34773 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
34775 rtx x0, x1, e0, e1;
34777 x0 = gen_reg_rtx (mode);
34778 e0 = gen_reg_rtx (mode);
34779 e1 = gen_reg_rtx (mode);
34780 x1 = gen_reg_rtx (mode);
34782 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
34784 b = force_reg (mode, b);
34786 /* x0 = rcp(b) estimate */
34787 emit_insn (gen_rtx_SET (VOIDmode, x0,
34788 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
34791 emit_insn (gen_rtx_SET (VOIDmode, e0,
34792 gen_rtx_MULT (mode, x0, b)));
34795 emit_insn (gen_rtx_SET (VOIDmode, e0,
34796 gen_rtx_MULT (mode, x0, e0)));
34799 emit_insn (gen_rtx_SET (VOIDmode, e1,
34800 gen_rtx_PLUS (mode, x0, x0)));
34803 emit_insn (gen_rtx_SET (VOIDmode, x1,
34804 gen_rtx_MINUS (mode, e1, e0)));
34807 emit_insn (gen_rtx_SET (VOIDmode, res,
34808 gen_rtx_MULT (mode, a, x1)));
34811 /* Output code to perform a Newton-Rhapson approximation of a
34812 single precision floating point [reciprocal] square root. */
34814 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
34817 rtx x0, e0, e1, e2, e3, mthree, mhalf;
34820 x0 = gen_reg_rtx (mode);
34821 e0 = gen_reg_rtx (mode);
34822 e1 = gen_reg_rtx (mode);
34823 e2 = gen_reg_rtx (mode);
34824 e3 = gen_reg_rtx (mode);
34826 real_from_integer (&r, VOIDmode, -3, -1, 0);
34827 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
34829 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
34830 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
34832 if (VECTOR_MODE_P (mode))
34834 mthree = ix86_build_const_vector (mode, true, mthree);
34835 mhalf = ix86_build_const_vector (mode, true, mhalf);
34838 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
34839 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
34841 a = force_reg (mode, a);
34843 /* x0 = rsqrt(a) estimate */
34844 emit_insn (gen_rtx_SET (VOIDmode, x0,
34845 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
34848 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
34853 zero = gen_reg_rtx (mode);
34854 mask = gen_reg_rtx (mode);
34856 zero = force_reg (mode, CONST0_RTX(mode));
34857 emit_insn (gen_rtx_SET (VOIDmode, mask,
34858 gen_rtx_NE (mode, zero, a)));
34860 emit_insn (gen_rtx_SET (VOIDmode, x0,
34861 gen_rtx_AND (mode, x0, mask)));
34865 emit_insn (gen_rtx_SET (VOIDmode, e0,
34866 gen_rtx_MULT (mode, x0, a)));
34868 emit_insn (gen_rtx_SET (VOIDmode, e1,
34869 gen_rtx_MULT (mode, e0, x0)));
34872 mthree = force_reg (mode, mthree);
34873 emit_insn (gen_rtx_SET (VOIDmode, e2,
34874 gen_rtx_PLUS (mode, e1, mthree)));
34876 mhalf = force_reg (mode, mhalf);
34878 /* e3 = -.5 * x0 */
34879 emit_insn (gen_rtx_SET (VOIDmode, e3,
34880 gen_rtx_MULT (mode, x0, mhalf)));
34882 /* e3 = -.5 * e0 */
34883 emit_insn (gen_rtx_SET (VOIDmode, e3,
34884 gen_rtx_MULT (mode, e0, mhalf)));
34885 /* ret = e2 * e3 */
34886 emit_insn (gen_rtx_SET (VOIDmode, res,
34887 gen_rtx_MULT (mode, e2, e3)));
34890 #ifdef TARGET_SOLARIS
34891 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
34894 i386_solaris_elf_named_section (const char *name, unsigned int flags,
34897 /* With Binutils 2.15, the "@unwind" marker must be specified on
34898 every occurrence of the ".eh_frame" section, not just the first
34901 && strcmp (name, ".eh_frame") == 0)
34903 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
34904 flags & SECTION_WRITE ? "aw" : "a");
34909 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
34911 solaris_elf_asm_comdat_section (name, flags, decl);
34916 default_elf_asm_named_section (name, flags, decl);
34918 #endif /* TARGET_SOLARIS */
34920 /* Return the mangling of TYPE if it is an extended fundamental type. */
34922 static const char *
34923 ix86_mangle_type (const_tree type)
34925 type = TYPE_MAIN_VARIANT (type);
34927 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
34928 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
34931 switch (TYPE_MODE (type))
34934 /* __float128 is "g". */
34937 /* "long double" or __float80 is "e". */
34944 /* For 32-bit code we can save PIC register setup by using
34945 __stack_chk_fail_local hidden function instead of calling
34946 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
34947 register, so it is better to call __stack_chk_fail directly. */
34949 static tree ATTRIBUTE_UNUSED
34950 ix86_stack_protect_fail (void)
34952 return TARGET_64BIT
34953 ? default_external_stack_protect_fail ()
34954 : default_hidden_stack_protect_fail ();
34957 /* Select a format to encode pointers in exception handling data. CODE
34958 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
34959 true if the symbol may be affected by dynamic relocations.
34961 ??? All x86 object file formats are capable of representing this.
34962 After all, the relocation needed is the same as for the call insn.
34963 Whether or not a particular assembler allows us to enter such, I
34964 guess we'll have to see. */
34966 asm_preferred_eh_data_format (int code, int global)
34970 int type = DW_EH_PE_sdata8;
34972 || ix86_cmodel == CM_SMALL_PIC
34973 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
34974 type = DW_EH_PE_sdata4;
34975 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
34977 if (ix86_cmodel == CM_SMALL
34978 || (ix86_cmodel == CM_MEDIUM && code))
34979 return DW_EH_PE_udata4;
34980 return DW_EH_PE_absptr;
34983 /* Expand copysign from SIGN to the positive value ABS_VALUE
34984 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
34987 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
34989 enum machine_mode mode = GET_MODE (sign);
34990 rtx sgn = gen_reg_rtx (mode);
34991 if (mask == NULL_RTX)
34993 enum machine_mode vmode;
34995 if (mode == SFmode)
34997 else if (mode == DFmode)
35002 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
35003 if (!VECTOR_MODE_P (mode))
35005 /* We need to generate a scalar mode mask in this case. */
35006 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
35007 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
35008 mask = gen_reg_rtx (mode);
35009 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
35013 mask = gen_rtx_NOT (mode, mask);
35014 emit_insn (gen_rtx_SET (VOIDmode, sgn,
35015 gen_rtx_AND (mode, mask, sign)));
35016 emit_insn (gen_rtx_SET (VOIDmode, result,
35017 gen_rtx_IOR (mode, abs_value, sgn)));
35020 /* Expand fabs (OP0) and return a new rtx that holds the result. The
35021 mask for masking out the sign-bit is stored in *SMASK, if that is
35024 ix86_expand_sse_fabs (rtx op0, rtx *smask)
35026 enum machine_mode vmode, mode = GET_MODE (op0);
35029 xa = gen_reg_rtx (mode);
35030 if (mode == SFmode)
35032 else if (mode == DFmode)
35036 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
35037 if (!VECTOR_MODE_P (mode))
35039 /* We need to generate a scalar mode mask in this case. */
35040 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
35041 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
35042 mask = gen_reg_rtx (mode);
35043 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
35045 emit_insn (gen_rtx_SET (VOIDmode, xa,
35046 gen_rtx_AND (mode, op0, mask)));
35054 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
35055 swapping the operands if SWAP_OPERANDS is true. The expanded
35056 code is a forward jump to a newly created label in case the
35057 comparison is true. The generated label rtx is returned. */
35059 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
35060 bool swap_operands)
35071 label = gen_label_rtx ();
35072 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
35073 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35074 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
35075 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
35076 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
35077 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
35078 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
35079 JUMP_LABEL (tmp) = label;
35084 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
35085 using comparison code CODE. Operands are swapped for the comparison if
35086 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
35088 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
35089 bool swap_operands)
35091 rtx (*insn)(rtx, rtx, rtx, rtx);
35092 enum machine_mode mode = GET_MODE (op0);
35093 rtx mask = gen_reg_rtx (mode);
35102 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
35104 emit_insn (insn (mask, op0, op1,
35105 gen_rtx_fmt_ee (code, mode, op0, op1)));
35109 /* Generate and return a rtx of mode MODE for 2**n where n is the number
35110 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
35112 ix86_gen_TWO52 (enum machine_mode mode)
35114 REAL_VALUE_TYPE TWO52r;
35117 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
35118 TWO52 = const_double_from_real_value (TWO52r, mode);
35119 TWO52 = force_reg (mode, TWO52);
35124 /* Expand SSE sequence for computing lround from OP1 storing
35127 ix86_expand_lround (rtx op0, rtx op1)
35129 /* C code for the stuff we're doing below:
35130 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
35133 enum machine_mode mode = GET_MODE (op1);
35134 const struct real_format *fmt;
35135 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35138 /* load nextafter (0.5, 0.0) */
35139 fmt = REAL_MODE_FORMAT (mode);
35140 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35141 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35143 /* adj = copysign (0.5, op1) */
35144 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
35145 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
35147 /* adj = op1 + adj */
35148 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
35150 /* op0 = (imode)adj */
35151 expand_fix (op0, adj, 0);
35154 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
35157 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
35159 /* C code for the stuff we're doing below (for do_floor):
35161 xi -= (double)xi > op1 ? 1 : 0;
35164 enum machine_mode fmode = GET_MODE (op1);
35165 enum machine_mode imode = GET_MODE (op0);
35166 rtx ireg, freg, label, tmp;
35168 /* reg = (long)op1 */
35169 ireg = gen_reg_rtx (imode);
35170 expand_fix (ireg, op1, 0);
35172 /* freg = (double)reg */
35173 freg = gen_reg_rtx (fmode);
35174 expand_float (freg, ireg, 0);
35176 /* ireg = (freg > op1) ? ireg - 1 : ireg */
35177 label = ix86_expand_sse_compare_and_jump (UNLE,
35178 freg, op1, !do_floor);
35179 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
35180 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
35181 emit_move_insn (ireg, tmp);
35183 emit_label (label);
35184 LABEL_NUSES (label) = 1;
35186 emit_move_insn (op0, ireg);
35189 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
35190 result in OPERAND0. */
35192 ix86_expand_rint (rtx operand0, rtx operand1)
35194 /* C code for the stuff we're doing below:
35195 xa = fabs (operand1);
35196 if (!isless (xa, 2**52))
35198 xa = xa + 2**52 - 2**52;
35199 return copysign (xa, operand1);
35201 enum machine_mode mode = GET_MODE (operand0);
35202 rtx res, xa, label, TWO52, mask;
35204 res = gen_reg_rtx (mode);
35205 emit_move_insn (res, operand1);
35207 /* xa = abs (operand1) */
35208 xa = ix86_expand_sse_fabs (res, &mask);
35210 /* if (!isless (xa, TWO52)) goto label; */
35211 TWO52 = ix86_gen_TWO52 (mode);
35212 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35214 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35215 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
35217 ix86_sse_copysign_to_positive (res, xa, res, mask);
35219 emit_label (label);
35220 LABEL_NUSES (label) = 1;
35222 emit_move_insn (operand0, res);
35225 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35228 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
35230 /* C code for the stuff we expand below.
35231 double xa = fabs (x), x2;
35232 if (!isless (xa, TWO52))
35234 xa = xa + TWO52 - TWO52;
35235 x2 = copysign (xa, x);
35244 enum machine_mode mode = GET_MODE (operand0);
35245 rtx xa, TWO52, tmp, label, one, res, mask;
35247 TWO52 = ix86_gen_TWO52 (mode);
35249 /* Temporary for holding the result, initialized to the input
35250 operand to ease control flow. */
35251 res = gen_reg_rtx (mode);
35252 emit_move_insn (res, operand1);
35254 /* xa = abs (operand1) */
35255 xa = ix86_expand_sse_fabs (res, &mask);
35257 /* if (!isless (xa, TWO52)) goto label; */
35258 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35260 /* xa = xa + TWO52 - TWO52; */
35261 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35262 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
35264 /* xa = copysign (xa, operand1) */
35265 ix86_sse_copysign_to_positive (xa, xa, res, mask);
35267 /* generate 1.0 or -1.0 */
35268 one = force_reg (mode,
35269 const_double_from_real_value (do_floor
35270 ? dconst1 : dconstm1, mode));
35272 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35273 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
35274 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35275 gen_rtx_AND (mode, one, tmp)));
35276 /* We always need to subtract here to preserve signed zero. */
35277 tmp = expand_simple_binop (mode, MINUS,
35278 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35279 emit_move_insn (res, tmp);
35281 emit_label (label);
35282 LABEL_NUSES (label) = 1;
35284 emit_move_insn (operand0, res);
35287 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35290 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
35292 /* C code for the stuff we expand below.
35293 double xa = fabs (x), x2;
35294 if (!isless (xa, TWO52))
35296 x2 = (double)(long)x;
35303 if (HONOR_SIGNED_ZEROS (mode))
35304 return copysign (x2, x);
35307 enum machine_mode mode = GET_MODE (operand0);
35308 rtx xa, xi, TWO52, tmp, label, one, res, mask;
35310 TWO52 = ix86_gen_TWO52 (mode);
35312 /* Temporary for holding the result, initialized to the input
35313 operand to ease control flow. */
35314 res = gen_reg_rtx (mode);
35315 emit_move_insn (res, operand1);
35317 /* xa = abs (operand1) */
35318 xa = ix86_expand_sse_fabs (res, &mask);
35320 /* if (!isless (xa, TWO52)) goto label; */
35321 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35323 /* xa = (double)(long)x */
35324 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35325 expand_fix (xi, res, 0);
35326 expand_float (xa, xi, 0);
35329 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
35331 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35332 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
35333 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35334 gen_rtx_AND (mode, one, tmp)));
35335 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
35336 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35337 emit_move_insn (res, tmp);
35339 if (HONOR_SIGNED_ZEROS (mode))
35340 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
35342 emit_label (label);
35343 LABEL_NUSES (label) = 1;
35345 emit_move_insn (operand0, res);
35348 /* Expand SSE sequence for computing round from OPERAND1 storing
35349 into OPERAND0. Sequence that works without relying on DImode truncation
35350 via cvttsd2siq that is only available on 64bit targets. */
35352 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
35354 /* C code for the stuff we expand below.
35355 double xa = fabs (x), xa2, x2;
35356 if (!isless (xa, TWO52))
35358 Using the absolute value and copying back sign makes
35359 -0.0 -> -0.0 correct.
35360 xa2 = xa + TWO52 - TWO52;
35365 else if (dxa > 0.5)
35367 x2 = copysign (xa2, x);
35370 enum machine_mode mode = GET_MODE (operand0);
35371 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
35373 TWO52 = ix86_gen_TWO52 (mode);
35375 /* Temporary for holding the result, initialized to the input
35376 operand to ease control flow. */
35377 res = gen_reg_rtx (mode);
35378 emit_move_insn (res, operand1);
35380 /* xa = abs (operand1) */
35381 xa = ix86_expand_sse_fabs (res, &mask);
35383 /* if (!isless (xa, TWO52)) goto label; */
35384 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35386 /* xa2 = xa + TWO52 - TWO52; */
35387 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35388 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
35390 /* dxa = xa2 - xa; */
35391 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
35393 /* generate 0.5, 1.0 and -0.5 */
35394 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
35395 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
35396 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
35400 tmp = gen_reg_rtx (mode);
35401 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
35402 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
35403 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35404 gen_rtx_AND (mode, one, tmp)));
35405 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35406 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
35407 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
35408 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35409 gen_rtx_AND (mode, one, tmp)));
35410 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35412 /* res = copysign (xa2, operand1) */
35413 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
35415 emit_label (label);
35416 LABEL_NUSES (label) = 1;
35418 emit_move_insn (operand0, res);
35421 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35424 ix86_expand_trunc (rtx operand0, rtx operand1)
35426 /* C code for SSE variant we expand below.
35427 double xa = fabs (x), x2;
35428 if (!isless (xa, TWO52))
35430 x2 = (double)(long)x;
35431 if (HONOR_SIGNED_ZEROS (mode))
35432 return copysign (x2, x);
35435 enum machine_mode mode = GET_MODE (operand0);
35436 rtx xa, xi, TWO52, label, res, mask;
35438 TWO52 = ix86_gen_TWO52 (mode);
35440 /* Temporary for holding the result, initialized to the input
35441 operand to ease control flow. */
35442 res = gen_reg_rtx (mode);
35443 emit_move_insn (res, operand1);
35445 /* xa = abs (operand1) */
35446 xa = ix86_expand_sse_fabs (res, &mask);
35448 /* if (!isless (xa, TWO52)) goto label; */
35449 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35451 /* x = (double)(long)x */
35452 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35453 expand_fix (xi, res, 0);
35454 expand_float (res, xi, 0);
35456 if (HONOR_SIGNED_ZEROS (mode))
35457 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
35459 emit_label (label);
35460 LABEL_NUSES (label) = 1;
35462 emit_move_insn (operand0, res);
35465 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35468 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
35470 enum machine_mode mode = GET_MODE (operand0);
35471 rtx xa, mask, TWO52, label, one, res, smask, tmp;
35473 /* C code for SSE variant we expand below.
35474 double xa = fabs (x), x2;
35475 if (!isless (xa, TWO52))
35477 xa2 = xa + TWO52 - TWO52;
35481 x2 = copysign (xa2, x);
35485 TWO52 = ix86_gen_TWO52 (mode);
35487 /* Temporary for holding the result, initialized to the input
35488 operand to ease control flow. */
35489 res = gen_reg_rtx (mode);
35490 emit_move_insn (res, operand1);
35492 /* xa = abs (operand1) */
35493 xa = ix86_expand_sse_fabs (res, &smask);
35495 /* if (!isless (xa, TWO52)) goto label; */
35496 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35498 /* res = xa + TWO52 - TWO52; */
35499 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35500 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
35501 emit_move_insn (res, tmp);
35504 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
35506 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
35507 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
35508 emit_insn (gen_rtx_SET (VOIDmode, mask,
35509 gen_rtx_AND (mode, mask, one)));
35510 tmp = expand_simple_binop (mode, MINUS,
35511 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
35512 emit_move_insn (res, tmp);
35514 /* res = copysign (res, operand1) */
35515 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
35517 emit_label (label);
35518 LABEL_NUSES (label) = 1;
35520 emit_move_insn (operand0, res);
35523 /* Expand SSE sequence for computing round from OPERAND1 storing
35526 ix86_expand_round (rtx operand0, rtx operand1)
35528 /* C code for the stuff we're doing below:
35529 double xa = fabs (x);
35530 if (!isless (xa, TWO52))
35532 xa = (double)(long)(xa + nextafter (0.5, 0.0));
35533 return copysign (xa, x);
35535 enum machine_mode mode = GET_MODE (operand0);
35536 rtx res, TWO52, xa, label, xi, half, mask;
35537 const struct real_format *fmt;
35538 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35540 /* Temporary for holding the result, initialized to the input
35541 operand to ease control flow. */
35542 res = gen_reg_rtx (mode);
35543 emit_move_insn (res, operand1);
35545 TWO52 = ix86_gen_TWO52 (mode);
35546 xa = ix86_expand_sse_fabs (res, &mask);
35547 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35549 /* load nextafter (0.5, 0.0) */
35550 fmt = REAL_MODE_FORMAT (mode);
35551 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35552 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35554 /* xa = xa + 0.5 */
35555 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
35556 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
35558 /* xa = (double)(int64_t)xa */
35559 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35560 expand_fix (xi, xa, 0);
35561 expand_float (xa, xi, 0);
35563 /* res = copysign (xa, operand1) */
35564 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
35566 emit_label (label);
35567 LABEL_NUSES (label) = 1;
35569 emit_move_insn (operand0, res);
35572 /* Expand SSE sequence for computing round
35573 from OP1 storing into OP0 using sse4 round insn. */
35575 ix86_expand_round_sse4 (rtx op0, rtx op1)
35577 enum machine_mode mode = GET_MODE (op0);
35578 rtx e1, e2, res, half;
35579 const struct real_format *fmt;
35580 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35581 rtx (*gen_copysign) (rtx, rtx, rtx);
35582 rtx (*gen_round) (rtx, rtx, rtx);
35587 gen_copysign = gen_copysignsf3;
35588 gen_round = gen_sse4_1_roundsf2;
35591 gen_copysign = gen_copysigndf3;
35592 gen_round = gen_sse4_1_rounddf2;
35595 gcc_unreachable ();
35598 /* round (a) = trunc (a + copysign (0.5, a)) */
35600 /* load nextafter (0.5, 0.0) */
35601 fmt = REAL_MODE_FORMAT (mode);
35602 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35603 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35604 half = const_double_from_real_value (pred_half, mode);
35606 /* e1 = copysign (0.5, op1) */
35607 e1 = gen_reg_rtx (mode);
35608 emit_insn (gen_copysign (e1, half, op1));
35610 /* e2 = op1 + e1 */
35611 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
35613 /* res = trunc (e2) */
35614 res = gen_reg_rtx (mode);
35615 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
35617 emit_move_insn (op0, res);
35621 /* Table of valid machine attributes. */
35622 static const struct attribute_spec ix86_attribute_table[] =
35624 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
35625 affects_type_identity } */
35626 /* Stdcall attribute says callee is responsible for popping arguments
35627 if they are not variable. */
35628 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35630 /* Fastcall attribute says callee is responsible for popping arguments
35631 if they are not variable. */
35632 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35634 /* Thiscall attribute says callee is responsible for popping arguments
35635 if they are not variable. */
35636 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35638 /* Cdecl attribute says the callee is a normal C declaration */
35639 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35641 /* Regparm attribute specifies how many integer arguments are to be
35642 passed in registers. */
35643 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
35645 /* Sseregparm attribute says we are using x86_64 calling conventions
35646 for FP arguments. */
35647 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35649 /* The transactional memory builtins are implicitly regparm or fastcall
35650 depending on the ABI. Override the generic do-nothing attribute that
35651 these builtins were declared with. */
35652 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
35654 /* force_align_arg_pointer says this function realigns the stack at entry. */
35655 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
35656 false, true, true, ix86_handle_cconv_attribute, false },
35657 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35658 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
35659 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
35660 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
35663 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
35665 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
35667 #ifdef SUBTARGET_ATTRIBUTE_TABLE
35668 SUBTARGET_ATTRIBUTE_TABLE,
35670 /* ms_abi and sysv_abi calling convention function attributes. */
35671 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
35672 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
35673 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
35675 { "callee_pop_aggregate_return", 1, 1, false, true, true,
35676 ix86_handle_callee_pop_aggregate_return, true },
35678 { NULL, 0, 0, false, false, false, NULL, false }
35681 /* Implement targetm.vectorize.builtin_vectorization_cost. */
35683 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
35684 tree vectype ATTRIBUTE_UNUSED,
35685 int misalign ATTRIBUTE_UNUSED)
35687 switch (type_of_cost)
35690 return ix86_cost->scalar_stmt_cost;
35693 return ix86_cost->scalar_load_cost;
35696 return ix86_cost->scalar_store_cost;
35699 return ix86_cost->vec_stmt_cost;
35702 return ix86_cost->vec_align_load_cost;
35705 return ix86_cost->vec_store_cost;
35707 case vec_to_scalar:
35708 return ix86_cost->vec_to_scalar_cost;
35710 case scalar_to_vec:
35711 return ix86_cost->scalar_to_vec_cost;
35713 case unaligned_load:
35714 case unaligned_store:
35715 return ix86_cost->vec_unalign_load_cost;
35717 case cond_branch_taken:
35718 return ix86_cost->cond_taken_branch_cost;
35720 case cond_branch_not_taken:
35721 return ix86_cost->cond_not_taken_branch_cost;
35724 case vec_promote_demote:
35725 return ix86_cost->vec_stmt_cost;
35728 gcc_unreachable ();
35732 /* Construct (set target (vec_select op0 (parallel perm))) and
35733 return true if that's a valid instruction in the active ISA. */
35736 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
35738 rtx rperm[MAX_VECT_LEN], x;
35741 for (i = 0; i < nelt; ++i)
35742 rperm[i] = GEN_INT (perm[i]);
35744 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
35745 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
35746 x = gen_rtx_SET (VOIDmode, target, x);
35749 if (recog_memoized (x) < 0)
35757 /* Similar, but generate a vec_concat from op0 and op1 as well. */
35760 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
35761 const unsigned char *perm, unsigned nelt)
35763 enum machine_mode v2mode;
35766 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
35767 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
35768 return expand_vselect (target, x, perm, nelt);
35771 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35772 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
35775 expand_vec_perm_blend (struct expand_vec_perm_d *d)
35777 enum machine_mode vmode = d->vmode;
35778 unsigned i, mask, nelt = d->nelt;
35779 rtx target, op0, op1, x;
35780 rtx rperm[32], vperm;
35782 if (d->op0 == d->op1)
35784 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
35786 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
35788 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
35793 /* This is a blend, not a permute. Elements must stay in their
35794 respective lanes. */
35795 for (i = 0; i < nelt; ++i)
35797 unsigned e = d->perm[i];
35798 if (!(e == i || e == i + nelt))
35805 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
35806 decision should be extracted elsewhere, so that we only try that
35807 sequence once all budget==3 options have been tried. */
35808 target = d->target;
35821 for (i = 0; i < nelt; ++i)
35822 mask |= (d->perm[i] >= nelt) << i;
35826 for (i = 0; i < 2; ++i)
35827 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
35832 for (i = 0; i < 4; ++i)
35833 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
35838 /* See if bytes move in pairs so we can use pblendw with
35839 an immediate argument, rather than pblendvb with a vector
35841 for (i = 0; i < 16; i += 2)
35842 if (d->perm[i] + 1 != d->perm[i + 1])
35845 for (i = 0; i < nelt; ++i)
35846 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
35849 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
35850 vperm = force_reg (vmode, vperm);
35852 if (GET_MODE_SIZE (vmode) == 16)
35853 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
35855 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
35859 for (i = 0; i < 8; ++i)
35860 mask |= (d->perm[i * 2] >= 16) << i;
35865 target = gen_lowpart (vmode, target);
35866 op0 = gen_lowpart (vmode, op0);
35867 op1 = gen_lowpart (vmode, op1);
35871 /* See if bytes move in pairs. If not, vpblendvb must be used. */
35872 for (i = 0; i < 32; i += 2)
35873 if (d->perm[i] + 1 != d->perm[i + 1])
35875 /* See if bytes move in quadruplets. If yes, vpblendd
35876 with immediate can be used. */
35877 for (i = 0; i < 32; i += 4)
35878 if (d->perm[i] + 2 != d->perm[i + 2])
35882 /* See if bytes move the same in both lanes. If yes,
35883 vpblendw with immediate can be used. */
35884 for (i = 0; i < 16; i += 2)
35885 if (d->perm[i] + 16 != d->perm[i + 16])
35888 /* Use vpblendw. */
35889 for (i = 0; i < 16; ++i)
35890 mask |= (d->perm[i * 2] >= 32) << i;
35895 /* Use vpblendd. */
35896 for (i = 0; i < 8; ++i)
35897 mask |= (d->perm[i * 4] >= 32) << i;
35902 /* See if words move in pairs. If yes, vpblendd can be used. */
35903 for (i = 0; i < 16; i += 2)
35904 if (d->perm[i] + 1 != d->perm[i + 1])
35908 /* See if words move the same in both lanes. If not,
35909 vpblendvb must be used. */
35910 for (i = 0; i < 8; i++)
35911 if (d->perm[i] + 8 != d->perm[i + 8])
35913 /* Use vpblendvb. */
35914 for (i = 0; i < 32; ++i)
35915 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
35919 target = gen_lowpart (vmode, target);
35920 op0 = gen_lowpart (vmode, op0);
35921 op1 = gen_lowpart (vmode, op1);
35922 goto finish_pblendvb;
35925 /* Use vpblendw. */
35926 for (i = 0; i < 16; ++i)
35927 mask |= (d->perm[i] >= 16) << i;
35931 /* Use vpblendd. */
35932 for (i = 0; i < 8; ++i)
35933 mask |= (d->perm[i * 2] >= 16) << i;
35938 /* Use vpblendd. */
35939 for (i = 0; i < 4; ++i)
35940 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
35945 gcc_unreachable ();
35948 /* This matches five different patterns with the different modes. */
35949 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
35950 x = gen_rtx_SET (VOIDmode, target, x);
35956 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35957 in terms of the variable form of vpermilps.
35959 Note that we will have already failed the immediate input vpermilps,
35960 which requires that the high and low part shuffle be identical; the
35961 variable form doesn't require that. */
35964 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
35966 rtx rperm[8], vperm;
35969 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
35972 /* We can only permute within the 128-bit lane. */
35973 for (i = 0; i < 8; ++i)
35975 unsigned e = d->perm[i];
35976 if (i < 4 ? e >= 4 : e < 4)
35983 for (i = 0; i < 8; ++i)
35985 unsigned e = d->perm[i];
35987 /* Within each 128-bit lane, the elements of op0 are numbered
35988 from 0 and the elements of op1 are numbered from 4. */
35994 rperm[i] = GEN_INT (e);
35997 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
35998 vperm = force_reg (V8SImode, vperm);
35999 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
36004 /* Return true if permutation D can be performed as VMODE permutation
36008 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
36010 unsigned int i, j, chunk;
36012 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
36013 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
36014 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
36017 if (GET_MODE_NUNITS (vmode) >= d->nelt)
36020 chunk = d->nelt / GET_MODE_NUNITS (vmode);
36021 for (i = 0; i < d->nelt; i += chunk)
36022 if (d->perm[i] & (chunk - 1))
36025 for (j = 1; j < chunk; ++j)
36026 if (d->perm[i] + j != d->perm[i + j])
36032 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36033 in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */
36036 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
36038 unsigned i, nelt, eltsz, mask;
36039 unsigned char perm[32];
36040 enum machine_mode vmode = V16QImode;
36041 rtx rperm[32], vperm, target, op0, op1;
36045 if (d->op0 != d->op1)
36047 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
36050 && valid_perm_using_mode_p (V2TImode, d))
36055 /* Use vperm2i128 insn. The pattern uses
36056 V4DImode instead of V2TImode. */
36057 target = gen_lowpart (V4DImode, d->target);
36058 op0 = gen_lowpart (V4DImode, d->op0);
36059 op1 = gen_lowpart (V4DImode, d->op1);
36061 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
36062 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
36063 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
36071 if (GET_MODE_SIZE (d->vmode) == 16)
36076 else if (GET_MODE_SIZE (d->vmode) == 32)
36081 /* V4DImode should be already handled through
36082 expand_vselect by vpermq instruction. */
36083 gcc_assert (d->vmode != V4DImode);
36086 if (d->vmode == V8SImode
36087 || d->vmode == V16HImode
36088 || d->vmode == V32QImode)
36090 /* First see if vpermq can be used for
36091 V8SImode/V16HImode/V32QImode. */
36092 if (valid_perm_using_mode_p (V4DImode, d))
36094 for (i = 0; i < 4; i++)
36095 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
36098 return expand_vselect (gen_lowpart (V4DImode, d->target),
36099 gen_lowpart (V4DImode, d->op0),
36103 /* Next see if vpermd can be used. */
36104 if (valid_perm_using_mode_p (V8SImode, d))
36108 if (vmode == V32QImode)
36110 /* vpshufb only works intra lanes, it is not
36111 possible to shuffle bytes in between the lanes. */
36112 for (i = 0; i < nelt; ++i)
36113 if ((d->perm[i] ^ i) & (nelt / 2))
36124 if (vmode == V8SImode)
36125 for (i = 0; i < 8; ++i)
36126 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
36129 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36130 if (d->op0 != d->op1)
36131 mask = 2 * nelt - 1;
36132 else if (vmode == V16QImode)
36135 mask = nelt / 2 - 1;
36137 for (i = 0; i < nelt; ++i)
36139 unsigned j, e = d->perm[i] & mask;
36140 for (j = 0; j < eltsz; ++j)
36141 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
36145 vperm = gen_rtx_CONST_VECTOR (vmode,
36146 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
36147 vperm = force_reg (vmode, vperm);
36149 target = gen_lowpart (vmode, d->target);
36150 op0 = gen_lowpart (vmode, d->op0);
36151 if (d->op0 == d->op1)
36153 if (vmode == V16QImode)
36154 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
36155 else if (vmode == V32QImode)
36156 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
36158 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
36162 op1 = gen_lowpart (vmode, d->op1);
36163 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
36169 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
36170 in a single instruction. */
36173 expand_vec_perm_1 (struct expand_vec_perm_d *d)
36175 unsigned i, nelt = d->nelt;
36176 unsigned char perm2[MAX_VECT_LEN];
36178 /* Check plain VEC_SELECT first, because AVX has instructions that could
36179 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
36180 input where SEL+CONCAT may not. */
36181 if (d->op0 == d->op1)
36183 int mask = nelt - 1;
36184 bool identity_perm = true;
36185 bool broadcast_perm = true;
36187 for (i = 0; i < nelt; i++)
36189 perm2[i] = d->perm[i] & mask;
36191 identity_perm = false;
36193 broadcast_perm = false;
36199 emit_move_insn (d->target, d->op0);
36202 else if (broadcast_perm && TARGET_AVX2)
36204 /* Use vpbroadcast{b,w,d}. */
36205 rtx op = d->op0, (*gen) (rtx, rtx) = NULL;
36209 op = gen_lowpart (V16QImode, op);
36210 gen = gen_avx2_pbroadcastv32qi;
36213 op = gen_lowpart (V8HImode, op);
36214 gen = gen_avx2_pbroadcastv16hi;
36217 op = gen_lowpart (V4SImode, op);
36218 gen = gen_avx2_pbroadcastv8si;
36221 gen = gen_avx2_pbroadcastv16qi;
36224 gen = gen_avx2_pbroadcastv8hi;
36226 /* For other modes prefer other shuffles this function creates. */
36232 emit_insn (gen (d->target, op));
36237 if (expand_vselect (d->target, d->op0, perm2, nelt))
36240 /* There are plenty of patterns in sse.md that are written for
36241 SEL+CONCAT and are not replicated for a single op. Perhaps
36242 that should be changed, to avoid the nastiness here. */
36244 /* Recognize interleave style patterns, which means incrementing
36245 every other permutation operand. */
36246 for (i = 0; i < nelt; i += 2)
36248 perm2[i] = d->perm[i] & mask;
36249 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
36251 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
36254 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
36257 for (i = 0; i < nelt; i += 4)
36259 perm2[i + 0] = d->perm[i + 0] & mask;
36260 perm2[i + 1] = d->perm[i + 1] & mask;
36261 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
36262 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
36265 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
36270 /* Finally, try the fully general two operand permute. */
36271 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
36274 /* Recognize interleave style patterns with reversed operands. */
36275 if (d->op0 != d->op1)
36277 for (i = 0; i < nelt; ++i)
36279 unsigned e = d->perm[i];
36287 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
36291 /* Try the SSE4.1 blend variable merge instructions. */
36292 if (expand_vec_perm_blend (d))
36295 /* Try one of the AVX vpermil variable permutations. */
36296 if (expand_vec_perm_vpermil (d))
36299 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
36300 vpshufb, vpermd or vpermq variable permutation. */
36301 if (expand_vec_perm_pshufb (d))
36307 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36308 in terms of a pair of pshuflw + pshufhw instructions. */
36311 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
36313 unsigned char perm2[MAX_VECT_LEN];
36317 if (d->vmode != V8HImode || d->op0 != d->op1)
36320 /* The two permutations only operate in 64-bit lanes. */
36321 for (i = 0; i < 4; ++i)
36322 if (d->perm[i] >= 4)
36324 for (i = 4; i < 8; ++i)
36325 if (d->perm[i] < 4)
36331 /* Emit the pshuflw. */
36332 memcpy (perm2, d->perm, 4);
36333 for (i = 4; i < 8; ++i)
36335 ok = expand_vselect (d->target, d->op0, perm2, 8);
36338 /* Emit the pshufhw. */
36339 memcpy (perm2 + 4, d->perm + 4, 4);
36340 for (i = 0; i < 4; ++i)
36342 ok = expand_vselect (d->target, d->target, perm2, 8);
36348 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36349 the permutation using the SSSE3 palignr instruction. This succeeds
36350 when all of the elements in PERM fit within one vector and we merely
36351 need to shift them down so that a single vector permutation has a
36352 chance to succeed. */
36355 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
36357 unsigned i, nelt = d->nelt;
36362 /* Even with AVX, palignr only operates on 128-bit vectors. */
36363 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
36366 min = nelt, max = 0;
36367 for (i = 0; i < nelt; ++i)
36369 unsigned e = d->perm[i];
36375 if (min == 0 || max - min >= nelt)
36378 /* Given that we have SSSE3, we know we'll be able to implement the
36379 single operand permutation after the palignr with pshufb. */
36383 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
36384 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
36385 gen_lowpart (TImode, d->op1),
36386 gen_lowpart (TImode, d->op0), shift));
36388 d->op0 = d->op1 = d->target;
36391 for (i = 0; i < nelt; ++i)
36393 unsigned e = d->perm[i] - min;
36399 /* Test for the degenerate case where the alignment by itself
36400 produces the desired permutation. */
36404 ok = expand_vec_perm_1 (d);
36410 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
36412 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36413 a two vector permutation into a single vector permutation by using
36414 an interleave operation to merge the vectors. */
36417 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
36419 struct expand_vec_perm_d dremap, dfinal;
36420 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
36421 unsigned HOST_WIDE_INT contents;
36422 unsigned char remap[2 * MAX_VECT_LEN];
36424 bool ok, same_halves = false;
36426 if (GET_MODE_SIZE (d->vmode) == 16)
36428 if (d->op0 == d->op1)
36431 else if (GET_MODE_SIZE (d->vmode) == 32)
36435 /* For 32-byte modes allow even d->op0 == d->op1.
36436 The lack of cross-lane shuffling in some instructions
36437 might prevent a single insn shuffle. */
36439 dfinal.testing_p = true;
36440 /* If expand_vec_perm_interleave3 can expand this into
36441 a 3 insn sequence, give up and let it be expanded as
36442 3 insn sequence. While that is one insn longer,
36443 it doesn't need a memory operand and in the common
36444 case that both interleave low and high permutations
36445 with the same operands are adjacent needs 4 insns
36446 for both after CSE. */
36447 if (expand_vec_perm_interleave3 (&dfinal))
36453 /* Examine from whence the elements come. */
36455 for (i = 0; i < nelt; ++i)
36456 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
36458 memset (remap, 0xff, sizeof (remap));
36461 if (GET_MODE_SIZE (d->vmode) == 16)
36463 unsigned HOST_WIDE_INT h1, h2, h3, h4;
36465 /* Split the two input vectors into 4 halves. */
36466 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
36471 /* If the elements from the low halves use interleave low, and similarly
36472 for interleave high. If the elements are from mis-matched halves, we
36473 can use shufps for V4SF/V4SI or do a DImode shuffle. */
36474 if ((contents & (h1 | h3)) == contents)
36477 for (i = 0; i < nelt2; ++i)
36480 remap[i + nelt] = i * 2 + 1;
36481 dremap.perm[i * 2] = i;
36482 dremap.perm[i * 2 + 1] = i + nelt;
36484 if (!TARGET_SSE2 && d->vmode == V4SImode)
36485 dremap.vmode = V4SFmode;
36487 else if ((contents & (h2 | h4)) == contents)
36490 for (i = 0; i < nelt2; ++i)
36492 remap[i + nelt2] = i * 2;
36493 remap[i + nelt + nelt2] = i * 2 + 1;
36494 dremap.perm[i * 2] = i + nelt2;
36495 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
36497 if (!TARGET_SSE2 && d->vmode == V4SImode)
36498 dremap.vmode = V4SFmode;
36500 else if ((contents & (h1 | h4)) == contents)
36503 for (i = 0; i < nelt2; ++i)
36506 remap[i + nelt + nelt2] = i + nelt2;
36507 dremap.perm[i] = i;
36508 dremap.perm[i + nelt2] = i + nelt + nelt2;
36513 dremap.vmode = V2DImode;
36515 dremap.perm[0] = 0;
36516 dremap.perm[1] = 3;
36519 else if ((contents & (h2 | h3)) == contents)
36522 for (i = 0; i < nelt2; ++i)
36524 remap[i + nelt2] = i;
36525 remap[i + nelt] = i + nelt2;
36526 dremap.perm[i] = i + nelt2;
36527 dremap.perm[i + nelt2] = i + nelt;
36532 dremap.vmode = V2DImode;
36534 dremap.perm[0] = 1;
36535 dremap.perm[1] = 2;
36543 unsigned int nelt4 = nelt / 4, nzcnt = 0;
36544 unsigned HOST_WIDE_INT q[8];
36545 unsigned int nonzero_halves[4];
36547 /* Split the two input vectors into 8 quarters. */
36548 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
36549 for (i = 1; i < 8; ++i)
36550 q[i] = q[0] << (nelt4 * i);
36551 for (i = 0; i < 4; ++i)
36552 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
36554 nonzero_halves[nzcnt] = i;
36560 gcc_assert (d->op0 == d->op1);
36561 nonzero_halves[1] = nonzero_halves[0];
36562 same_halves = true;
36564 else if (d->op0 == d->op1)
36566 gcc_assert (nonzero_halves[0] == 0);
36567 gcc_assert (nonzero_halves[1] == 1);
36572 if (d->perm[0] / nelt2 == nonzero_halves[1])
36574 /* Attempt to increase the likelyhood that dfinal
36575 shuffle will be intra-lane. */
36576 char tmph = nonzero_halves[0];
36577 nonzero_halves[0] = nonzero_halves[1];
36578 nonzero_halves[1] = tmph;
36581 /* vperm2f128 or vperm2i128. */
36582 for (i = 0; i < nelt2; ++i)
36584 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
36585 remap[i + nonzero_halves[0] * nelt2] = i;
36586 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
36587 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
36590 if (d->vmode != V8SFmode
36591 && d->vmode != V4DFmode
36592 && d->vmode != V8SImode)
36594 dremap.vmode = V8SImode;
36596 for (i = 0; i < 4; ++i)
36598 dremap.perm[i] = i + nonzero_halves[0] * 4;
36599 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
36603 else if (d->op0 == d->op1)
36605 else if (TARGET_AVX2
36606 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
36609 for (i = 0; i < nelt4; ++i)
36612 remap[i + nelt] = i * 2 + 1;
36613 remap[i + nelt2] = i * 2 + nelt2;
36614 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
36615 dremap.perm[i * 2] = i;
36616 dremap.perm[i * 2 + 1] = i + nelt;
36617 dremap.perm[i * 2 + nelt2] = i + nelt2;
36618 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
36621 else if (TARGET_AVX2
36622 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
36625 for (i = 0; i < nelt4; ++i)
36627 remap[i + nelt4] = i * 2;
36628 remap[i + nelt + nelt4] = i * 2 + 1;
36629 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
36630 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
36631 dremap.perm[i * 2] = i + nelt4;
36632 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
36633 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
36634 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
36641 /* Use the remapping array set up above to move the elements from their
36642 swizzled locations into their final destinations. */
36644 for (i = 0; i < nelt; ++i)
36646 unsigned e = remap[d->perm[i]];
36647 gcc_assert (e < nelt);
36648 /* If same_halves is true, both halves of the remapped vector are the
36649 same. Avoid cross-lane accesses if possible. */
36650 if (same_halves && i >= nelt2)
36652 gcc_assert (e < nelt2);
36653 dfinal.perm[i] = e + nelt2;
36656 dfinal.perm[i] = e;
36660 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
36661 dfinal.op1 = dfinal.op0;
36662 dremap.target = dfinal.op0;
36664 /* Test if the final remap can be done with a single insn. For V4SFmode or
36665 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
36667 ok = expand_vec_perm_1 (&dfinal);
36668 seq = get_insns ();
36677 if (dremap.vmode != dfinal.vmode)
36679 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
36680 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
36681 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
36684 ok = expand_vec_perm_1 (&dremap);
36691 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36692 a single vector cross-lane permutation into vpermq followed
36693 by any of the single insn permutations. */
36696 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
36698 struct expand_vec_perm_d dremap, dfinal;
36699 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
36700 unsigned contents[2];
36704 && (d->vmode == V32QImode || d->vmode == V16HImode)
36705 && d->op0 == d->op1))
36710 for (i = 0; i < nelt2; ++i)
36712 contents[0] |= 1u << (d->perm[i] / nelt4);
36713 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
36716 for (i = 0; i < 2; ++i)
36718 unsigned int cnt = 0;
36719 for (j = 0; j < 4; ++j)
36720 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
36728 dremap.vmode = V4DImode;
36730 dremap.target = gen_reg_rtx (V4DImode);
36731 dremap.op0 = gen_lowpart (V4DImode, d->op0);
36732 dremap.op1 = dremap.op0;
36733 for (i = 0; i < 2; ++i)
36735 unsigned int cnt = 0;
36736 for (j = 0; j < 4; ++j)
36737 if ((contents[i] & (1u << j)) != 0)
36738 dremap.perm[2 * i + cnt++] = j;
36739 for (; cnt < 2; ++cnt)
36740 dremap.perm[2 * i + cnt] = 0;
36744 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
36745 dfinal.op1 = dfinal.op0;
36746 for (i = 0, j = 0; i < nelt; ++i)
36750 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
36751 if ((d->perm[i] / nelt4) == dremap.perm[j])
36753 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
36754 dfinal.perm[i] |= nelt4;
36756 gcc_unreachable ();
36759 ok = expand_vec_perm_1 (&dremap);
36762 ok = expand_vec_perm_1 (&dfinal);
36768 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36769 a two vector permutation using 2 intra-lane interleave insns
36770 and cross-lane shuffle for 32-byte vectors. */
36773 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
36776 rtx (*gen) (rtx, rtx, rtx);
36778 if (d->op0 == d->op1)
36780 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
36782 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
36788 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
36790 for (i = 0; i < nelt; i += 2)
36791 if (d->perm[i] != d->perm[0] + i / 2
36792 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
36802 gen = gen_vec_interleave_highv32qi;
36804 gen = gen_vec_interleave_lowv32qi;
36808 gen = gen_vec_interleave_highv16hi;
36810 gen = gen_vec_interleave_lowv16hi;
36814 gen = gen_vec_interleave_highv8si;
36816 gen = gen_vec_interleave_lowv8si;
36820 gen = gen_vec_interleave_highv4di;
36822 gen = gen_vec_interleave_lowv4di;
36826 gen = gen_vec_interleave_highv8sf;
36828 gen = gen_vec_interleave_lowv8sf;
36832 gen = gen_vec_interleave_highv4df;
36834 gen = gen_vec_interleave_lowv4df;
36837 gcc_unreachable ();
36840 emit_insn (gen (d->target, d->op0, d->op1));
36844 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
36845 permutation with two pshufb insns and an ior. We should have already
36846 failed all two instruction sequences. */
36849 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
36851 rtx rperm[2][16], vperm, l, h, op, m128;
36852 unsigned int i, nelt, eltsz;
36854 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
36856 gcc_assert (d->op0 != d->op1);
36862 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36864 /* Generate two permutation masks. If the required element is within
36865 the given vector it is shuffled into the proper lane. If the required
36866 element is in the other vector, force a zero into the lane by setting
36867 bit 7 in the permutation mask. */
36868 m128 = GEN_INT (-128);
36869 for (i = 0; i < nelt; ++i)
36871 unsigned j, e = d->perm[i];
36872 unsigned which = (e >= nelt);
36876 for (j = 0; j < eltsz; ++j)
36878 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
36879 rperm[1-which][i*eltsz + j] = m128;
36883 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
36884 vperm = force_reg (V16QImode, vperm);
36886 l = gen_reg_rtx (V16QImode);
36887 op = gen_lowpart (V16QImode, d->op0);
36888 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
36890 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
36891 vperm = force_reg (V16QImode, vperm);
36893 h = gen_reg_rtx (V16QImode);
36894 op = gen_lowpart (V16QImode, d->op1);
36895 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
36897 op = gen_lowpart (V16QImode, d->target);
36898 emit_insn (gen_iorv16qi3 (op, l, h));
36903 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
36904 with two vpshufb insns, vpermq and vpor. We should have already failed
36905 all two or three instruction sequences. */
36908 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
36910 rtx rperm[2][32], vperm, l, h, hp, op, m128;
36911 unsigned int i, nelt, eltsz;
36914 || d->op0 != d->op1
36915 || (d->vmode != V32QImode && d->vmode != V16HImode))
36922 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36924 /* Generate two permutation masks. If the required element is within
36925 the same lane, it is shuffled in. If the required element from the
36926 other lane, force a zero by setting bit 7 in the permutation mask.
36927 In the other mask the mask has non-negative elements if element
36928 is requested from the other lane, but also moved to the other lane,
36929 so that the result of vpshufb can have the two V2TImode halves
36931 m128 = GEN_INT (-128);
36932 for (i = 0; i < nelt; ++i)
36934 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
36935 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
36937 for (j = 0; j < eltsz; ++j)
36939 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
36940 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
36944 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
36945 vperm = force_reg (V32QImode, vperm);
36947 h = gen_reg_rtx (V32QImode);
36948 op = gen_lowpart (V32QImode, d->op0);
36949 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
36951 /* Swap the 128-byte lanes of h into hp. */
36952 hp = gen_reg_rtx (V4DImode);
36953 op = gen_lowpart (V4DImode, h);
36954 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
36957 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
36958 vperm = force_reg (V32QImode, vperm);
36960 l = gen_reg_rtx (V32QImode);
36961 op = gen_lowpart (V32QImode, d->op0);
36962 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
36964 op = gen_lowpart (V32QImode, d->target);
36965 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
36970 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
36971 and extract-odd permutations of two V32QImode and V16QImode operand
36972 with two vpshufb insns, vpor and vpermq. We should have already
36973 failed all two or three instruction sequences. */
36976 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
36978 rtx rperm[2][32], vperm, l, h, ior, op, m128;
36979 unsigned int i, nelt, eltsz;
36982 || d->op0 == d->op1
36983 || (d->vmode != V32QImode && d->vmode != V16HImode))
36986 for (i = 0; i < d->nelt; ++i)
36987 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
36994 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36996 /* Generate two permutation masks. In the first permutation mask
36997 the first quarter will contain indexes for the first half
36998 of the op0, the second quarter will contain bit 7 set, third quarter
36999 will contain indexes for the second half of the op0 and the
37000 last quarter bit 7 set. In the second permutation mask
37001 the first quarter will contain bit 7 set, the second quarter
37002 indexes for the first half of the op1, the third quarter bit 7 set
37003 and last quarter indexes for the second half of the op1.
37004 I.e. the first mask e.g. for V32QImode extract even will be:
37005 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
37006 (all values masked with 0xf except for -128) and second mask
37007 for extract even will be
37008 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
37009 m128 = GEN_INT (-128);
37010 for (i = 0; i < nelt; ++i)
37012 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
37013 unsigned which = d->perm[i] >= nelt;
37014 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
37016 for (j = 0; j < eltsz; ++j)
37018 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
37019 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
37023 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
37024 vperm = force_reg (V32QImode, vperm);
37026 l = gen_reg_rtx (V32QImode);
37027 op = gen_lowpart (V32QImode, d->op0);
37028 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
37030 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
37031 vperm = force_reg (V32QImode, vperm);
37033 h = gen_reg_rtx (V32QImode);
37034 op = gen_lowpart (V32QImode, d->op1);
37035 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
37037 ior = gen_reg_rtx (V32QImode);
37038 emit_insn (gen_iorv32qi3 (ior, l, h));
37040 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
37041 op = gen_lowpart (V4DImode, d->target);
37042 ior = gen_lowpart (V4DImode, ior);
37043 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
37044 const1_rtx, GEN_INT (3)));
37049 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
37050 and extract-odd permutations. */
37053 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
37062 t1 = gen_reg_rtx (V4DFmode);
37063 t2 = gen_reg_rtx (V4DFmode);
37065 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37066 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
37067 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
37069 /* Now an unpck[lh]pd will produce the result required. */
37071 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
37073 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
37079 int mask = odd ? 0xdd : 0x88;
37083 t1 = gen_reg_rtx (V8SFmode);
37084 t2 = gen_reg_rtx (V8SFmode);
37085 t3 = gen_reg_rtx (V8SFmode);
37087 /* Shuffle within the 128-bit lanes to produce:
37088 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
37089 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
37092 /* Shuffle the lanes around to produce:
37093 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
37094 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
37097 /* Shuffle within the 128-bit lanes to produce:
37098 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
37099 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
37101 /* Shuffle within the 128-bit lanes to produce:
37102 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
37103 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
37105 /* Shuffle the lanes around to produce:
37106 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
37107 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
37116 /* These are always directly implementable by expand_vec_perm_1. */
37117 gcc_unreachable ();
37121 return expand_vec_perm_pshufb2 (d);
37126 /* We need 2*log2(N)-1 operations to achieve odd/even
37127 with interleave. */
37128 t1 = gen_reg_rtx (V8HImode);
37129 t2 = gen_reg_rtx (V8HImode);
37130 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
37131 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
37132 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
37133 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
37135 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
37137 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
37144 return expand_vec_perm_pshufb2 (d);
37149 t1 = gen_reg_rtx (V16QImode);
37150 t2 = gen_reg_rtx (V16QImode);
37151 t3 = gen_reg_rtx (V16QImode);
37152 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
37153 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
37154 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
37155 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
37156 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
37157 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
37159 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
37161 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
37168 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
37173 struct expand_vec_perm_d d_copy = *d;
37174 d_copy.vmode = V4DFmode;
37175 d_copy.target = gen_lowpart (V4DFmode, d->target);
37176 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
37177 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
37178 return expand_vec_perm_even_odd_1 (&d_copy, odd);
37184 t1 = gen_reg_rtx (V4DImode);
37185 t2 = gen_reg_rtx (V4DImode);
37187 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37188 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
37189 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
37191 /* Now an vpunpck[lh]qdq will produce the result required. */
37193 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
37195 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
37202 struct expand_vec_perm_d d_copy = *d;
37203 d_copy.vmode = V8SFmode;
37204 d_copy.target = gen_lowpart (V8SFmode, d->target);
37205 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
37206 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
37207 return expand_vec_perm_even_odd_1 (&d_copy, odd);
37213 t1 = gen_reg_rtx (V8SImode);
37214 t2 = gen_reg_rtx (V8SImode);
37216 /* Shuffle the lanes around into
37217 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
37218 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t1),
37219 gen_lowpart (V4DImode, d->op0),
37220 gen_lowpart (V4DImode, d->op1),
37222 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t2),
37223 gen_lowpart (V4DImode, d->op0),
37224 gen_lowpart (V4DImode, d->op1),
37227 /* Swap the 2nd and 3rd position in each lane into
37228 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
37229 emit_insn (gen_avx2_pshufdv3 (t1, t1,
37230 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37231 emit_insn (gen_avx2_pshufdv3 (t2, t2,
37232 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37234 /* Now an vpunpck[lh]qdq will produce
37235 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
37237 t3 = gen_avx2_interleave_highv4di (gen_lowpart (V4DImode, d->target),
37238 gen_lowpart (V4DImode, t1),
37239 gen_lowpart (V4DImode, t2));
37241 t3 = gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode, d->target),
37242 gen_lowpart (V4DImode, t1),
37243 gen_lowpart (V4DImode, t2));
37248 gcc_unreachable ();
37254 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37255 extract-even and extract-odd permutations. */
37258 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
37260 unsigned i, odd, nelt = d->nelt;
37263 if (odd != 0 && odd != 1)
37266 for (i = 1; i < nelt; ++i)
37267 if (d->perm[i] != 2 * i + odd)
37270 return expand_vec_perm_even_odd_1 (d, odd);
37273 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
37274 permutations. We assume that expand_vec_perm_1 has already failed. */
37277 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
37279 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
37280 enum machine_mode vmode = d->vmode;
37281 unsigned char perm2[4];
37289 /* These are special-cased in sse.md so that we can optionally
37290 use the vbroadcast instruction. They expand to two insns
37291 if the input happens to be in a register. */
37292 gcc_unreachable ();
37298 /* These are always implementable using standard shuffle patterns. */
37299 gcc_unreachable ();
37303 /* These can be implemented via interleave. We save one insn by
37304 stopping once we have promoted to V4SImode and then use pshufd. */
37310 rtx (*gen) (rtx, rtx, rtx)
37311 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
37312 : gen_vec_interleave_lowv8hi;
37316 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
37317 : gen_vec_interleave_highv8hi;
37322 dest = gen_reg_rtx (vmode);
37323 emit_insn (gen (dest, op0, op0));
37324 vmode = get_mode_wider_vector (vmode);
37325 op0 = gen_lowpart (vmode, dest);
37327 while (vmode != V4SImode);
37329 memset (perm2, elt, 4);
37330 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
37338 /* For AVX2 broadcasts of the first element vpbroadcast* or
37339 vpermq should be used by expand_vec_perm_1. */
37340 gcc_assert (!TARGET_AVX2 || d->perm[0]);
37344 gcc_unreachable ();
37348 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37349 broadcast permutations. */
37352 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
37354 unsigned i, elt, nelt = d->nelt;
37356 if (d->op0 != d->op1)
37360 for (i = 1; i < nelt; ++i)
37361 if (d->perm[i] != elt)
37364 return expand_vec_perm_broadcast_1 (d);
37367 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
37368 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
37369 all the shorter instruction sequences. */
37372 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
37374 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
37375 unsigned int i, nelt, eltsz;
37379 || d->op0 == d->op1
37380 || (d->vmode != V32QImode && d->vmode != V16HImode))
37387 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
37389 /* Generate 4 permutation masks. If the required element is within
37390 the same lane, it is shuffled in. If the required element from the
37391 other lane, force a zero by setting bit 7 in the permutation mask.
37392 In the other mask the mask has non-negative elements if element
37393 is requested from the other lane, but also moved to the other lane,
37394 so that the result of vpshufb can have the two V2TImode halves
37396 m128 = GEN_INT (-128);
37397 for (i = 0; i < 32; ++i)
37399 rperm[0][i] = m128;
37400 rperm[1][i] = m128;
37401 rperm[2][i] = m128;
37402 rperm[3][i] = m128;
37408 for (i = 0; i < nelt; ++i)
37410 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
37411 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
37412 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
37414 for (j = 0; j < eltsz; ++j)
37415 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
37416 used[which] = true;
37419 for (i = 0; i < 2; ++i)
37421 if (!used[2 * i + 1])
37426 vperm = gen_rtx_CONST_VECTOR (V32QImode,
37427 gen_rtvec_v (32, rperm[2 * i + 1]));
37428 vperm = force_reg (V32QImode, vperm);
37429 h[i] = gen_reg_rtx (V32QImode);
37430 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
37431 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
37434 /* Swap the 128-byte lanes of h[X]. */
37435 for (i = 0; i < 2; ++i)
37437 if (h[i] == NULL_RTX)
37439 op = gen_reg_rtx (V4DImode);
37440 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
37441 const2_rtx, GEN_INT (3), const0_rtx,
37443 h[i] = gen_lowpart (V32QImode, op);
37446 for (i = 0; i < 2; ++i)
37453 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
37454 vperm = force_reg (V32QImode, vperm);
37455 l[i] = gen_reg_rtx (V32QImode);
37456 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
37457 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
37460 for (i = 0; i < 2; ++i)
37464 op = gen_reg_rtx (V32QImode);
37465 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
37472 gcc_assert (l[0] && l[1]);
37473 op = gen_lowpart (V32QImode, d->target);
37474 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
37478 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
37479 With all of the interface bits taken care of, perform the expansion
37480 in D and return true on success. */
37483 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
37485 /* Try a single instruction expansion. */
37486 if (expand_vec_perm_1 (d))
37489 /* Try sequences of two instructions. */
37491 if (expand_vec_perm_pshuflw_pshufhw (d))
37494 if (expand_vec_perm_palignr (d))
37497 if (expand_vec_perm_interleave2 (d))
37500 if (expand_vec_perm_broadcast (d))
37503 if (expand_vec_perm_vpermq_perm_1 (d))
37506 /* Try sequences of three instructions. */
37508 if (expand_vec_perm_pshufb2 (d))
37511 if (expand_vec_perm_interleave3 (d))
37514 /* Try sequences of four instructions. */
37516 if (expand_vec_perm_vpshufb2_vpermq (d))
37519 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
37522 /* ??? Look for narrow permutations whose element orderings would
37523 allow the promotion to a wider mode. */
37525 /* ??? Look for sequences of interleave or a wider permute that place
37526 the data into the correct lanes for a half-vector shuffle like
37527 pshuf[lh]w or vpermilps. */
37529 /* ??? Look for sequences of interleave that produce the desired results.
37530 The combinatorics of punpck[lh] get pretty ugly... */
37532 if (expand_vec_perm_even_odd (d))
37535 /* Even longer sequences. */
37536 if (expand_vec_perm_vpshufb4_vpermq2 (d))
37543 ix86_expand_vec_perm_const (rtx operands[4])
37545 struct expand_vec_perm_d d;
37546 unsigned char perm[MAX_VECT_LEN];
37547 int i, nelt, which;
37550 d.target = operands[0];
37551 d.op0 = operands[1];
37552 d.op1 = operands[2];
37555 d.vmode = GET_MODE (d.target);
37556 gcc_assert (VECTOR_MODE_P (d.vmode));
37557 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37558 d.testing_p = false;
37560 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
37561 gcc_assert (XVECLEN (sel, 0) == nelt);
37562 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
37564 for (i = which = 0; i < nelt; ++i)
37566 rtx e = XVECEXP (sel, 0, i);
37567 int ei = INTVAL (e) & (2 * nelt - 1);
37569 which |= (ei < nelt ? 1 : 2);
37580 if (!rtx_equal_p (d.op0, d.op1))
37583 /* The elements of PERM do not suggest that only the first operand
37584 is used, but both operands are identical. Allow easier matching
37585 of the permutation by folding the permutation into the single
37587 for (i = 0; i < nelt; ++i)
37588 if (d.perm[i] >= nelt)
37597 for (i = 0; i < nelt; ++i)
37603 if (ix86_expand_vec_perm_const_1 (&d))
37606 /* If the mask says both arguments are needed, but they are the same,
37607 the above tried to expand with d.op0 == d.op1. If that didn't work,
37608 retry with d.op0 != d.op1 as that is what testing has been done with. */
37609 if (which == 3 && d.op0 == d.op1)
37614 memcpy (d.perm, perm, sizeof (perm));
37615 d.op1 = gen_reg_rtx (d.vmode);
37617 ok = ix86_expand_vec_perm_const_1 (&d);
37618 seq = get_insns ();
37622 emit_move_insn (d.op1, d.op0);
37631 /* Implement targetm.vectorize.vec_perm_const_ok. */
37634 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
37635 const unsigned char *sel)
37637 struct expand_vec_perm_d d;
37638 unsigned int i, nelt, which;
37642 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37643 d.testing_p = true;
37645 /* Given sufficient ISA support we can just return true here
37646 for selected vector modes. */
37647 if (GET_MODE_SIZE (d.vmode) == 16)
37649 /* All implementable with a single vpperm insn. */
37652 /* All implementable with 2 pshufb + 1 ior. */
37655 /* All implementable with shufpd or unpck[lh]pd. */
37660 /* Extract the values from the vector CST into the permutation
37662 memcpy (d.perm, sel, nelt);
37663 for (i = which = 0; i < nelt; ++i)
37665 unsigned char e = d.perm[i];
37666 gcc_assert (e < 2 * nelt);
37667 which |= (e < nelt ? 1 : 2);
37670 /* For all elements from second vector, fold the elements to first. */
37672 for (i = 0; i < nelt; ++i)
37675 /* Check whether the mask can be applied to the vector type. */
37676 one_vec = (which != 3);
37678 /* Implementable with shufps or pshufd. */
37679 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
37682 /* Otherwise we have to go through the motions and see if we can
37683 figure out how to generate the requested permutation. */
37684 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
37685 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
37687 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
37690 ret = ix86_expand_vec_perm_const_1 (&d);
37697 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
37699 struct expand_vec_perm_d d;
37705 d.vmode = GET_MODE (targ);
37706 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37707 d.testing_p = false;
37709 for (i = 0; i < nelt; ++i)
37710 d.perm[i] = i * 2 + odd;
37712 /* We'll either be able to implement the permutation directly... */
37713 if (expand_vec_perm_1 (&d))
37716 /* ... or we use the special-case patterns. */
37717 expand_vec_perm_even_odd_1 (&d, odd);
37720 /* Expand an insert into a vector register through pinsr insn.
37721 Return true if successful. */
37724 ix86_expand_pinsr (rtx *operands)
37726 rtx dst = operands[0];
37727 rtx src = operands[3];
37729 unsigned int size = INTVAL (operands[1]);
37730 unsigned int pos = INTVAL (operands[2]);
37732 if (GET_CODE (dst) == SUBREG)
37734 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
37735 dst = SUBREG_REG (dst);
37738 if (GET_CODE (src) == SUBREG)
37739 src = SUBREG_REG (src);
37741 switch (GET_MODE (dst))
37748 enum machine_mode srcmode, dstmode;
37749 rtx (*pinsr)(rtx, rtx, rtx, rtx);
37751 srcmode = mode_for_size (size, MODE_INT, 0);
37756 if (!TARGET_SSE4_1)
37758 dstmode = V16QImode;
37759 pinsr = gen_sse4_1_pinsrb;
37765 dstmode = V8HImode;
37766 pinsr = gen_sse2_pinsrw;
37770 if (!TARGET_SSE4_1)
37772 dstmode = V4SImode;
37773 pinsr = gen_sse4_1_pinsrd;
37777 gcc_assert (TARGET_64BIT);
37778 if (!TARGET_SSE4_1)
37780 dstmode = V2DImode;
37781 pinsr = gen_sse4_1_pinsrq;
37788 dst = gen_lowpart (dstmode, dst);
37789 src = gen_lowpart (srcmode, src);
37793 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
37802 /* This function returns the calling abi specific va_list type node.
37803 It returns the FNDECL specific va_list type. */
37806 ix86_fn_abi_va_list (tree fndecl)
37809 return va_list_type_node;
37810 gcc_assert (fndecl != NULL_TREE);
37812 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
37813 return ms_va_list_type_node;
37815 return sysv_va_list_type_node;
37818 /* Returns the canonical va_list type specified by TYPE. If there
37819 is no valid TYPE provided, it return NULL_TREE. */
37822 ix86_canonical_va_list_type (tree type)
37826 /* Resolve references and pointers to va_list type. */
37827 if (TREE_CODE (type) == MEM_REF)
37828 type = TREE_TYPE (type);
37829 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
37830 type = TREE_TYPE (type);
37831 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
37832 type = TREE_TYPE (type);
37834 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
37836 wtype = va_list_type_node;
37837 gcc_assert (wtype != NULL_TREE);
37839 if (TREE_CODE (wtype) == ARRAY_TYPE)
37841 /* If va_list is an array type, the argument may have decayed
37842 to a pointer type, e.g. by being passed to another function.
37843 In that case, unwrap both types so that we can compare the
37844 underlying records. */
37845 if (TREE_CODE (htype) == ARRAY_TYPE
37846 || POINTER_TYPE_P (htype))
37848 wtype = TREE_TYPE (wtype);
37849 htype = TREE_TYPE (htype);
37852 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37853 return va_list_type_node;
37854 wtype = sysv_va_list_type_node;
37855 gcc_assert (wtype != NULL_TREE);
37857 if (TREE_CODE (wtype) == ARRAY_TYPE)
37859 /* If va_list is an array type, the argument may have decayed
37860 to a pointer type, e.g. by being passed to another function.
37861 In that case, unwrap both types so that we can compare the
37862 underlying records. */
37863 if (TREE_CODE (htype) == ARRAY_TYPE
37864 || POINTER_TYPE_P (htype))
37866 wtype = TREE_TYPE (wtype);
37867 htype = TREE_TYPE (htype);
37870 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37871 return sysv_va_list_type_node;
37872 wtype = ms_va_list_type_node;
37873 gcc_assert (wtype != NULL_TREE);
37875 if (TREE_CODE (wtype) == ARRAY_TYPE)
37877 /* If va_list is an array type, the argument may have decayed
37878 to a pointer type, e.g. by being passed to another function.
37879 In that case, unwrap both types so that we can compare the
37880 underlying records. */
37881 if (TREE_CODE (htype) == ARRAY_TYPE
37882 || POINTER_TYPE_P (htype))
37884 wtype = TREE_TYPE (wtype);
37885 htype = TREE_TYPE (htype);
37888 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37889 return ms_va_list_type_node;
37892 return std_canonical_va_list_type (type);
37895 /* Iterate through the target-specific builtin types for va_list.
37896 IDX denotes the iterator, *PTREE is set to the result type of
37897 the va_list builtin, and *PNAME to its internal type.
37898 Returns zero if there is no element for this index, otherwise
37899 IDX should be increased upon the next call.
37900 Note, do not iterate a base builtin's name like __builtin_va_list.
37901 Used from c_common_nodes_and_builtins. */
37904 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
37914 *ptree = ms_va_list_type_node;
37915 *pname = "__builtin_ms_va_list";
37919 *ptree = sysv_va_list_type_node;
37920 *pname = "__builtin_sysv_va_list";
37928 #undef TARGET_SCHED_DISPATCH
37929 #define TARGET_SCHED_DISPATCH has_dispatch
37930 #undef TARGET_SCHED_DISPATCH_DO
37931 #define TARGET_SCHED_DISPATCH_DO do_dispatch
37932 #undef TARGET_SCHED_REASSOCIATION_WIDTH
37933 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
37935 /* The size of the dispatch window is the total number of bytes of
37936 object code allowed in a window. */
37937 #define DISPATCH_WINDOW_SIZE 16
37939 /* Number of dispatch windows considered for scheduling. */
37940 #define MAX_DISPATCH_WINDOWS 3
37942 /* Maximum number of instructions in a window. */
37945 /* Maximum number of immediate operands in a window. */
37948 /* Maximum number of immediate bits allowed in a window. */
37949 #define MAX_IMM_SIZE 128
37951 /* Maximum number of 32 bit immediates allowed in a window. */
37952 #define MAX_IMM_32 4
37954 /* Maximum number of 64 bit immediates allowed in a window. */
37955 #define MAX_IMM_64 2
37957 /* Maximum total of loads or prefetches allowed in a window. */
37960 /* Maximum total of stores allowed in a window. */
37961 #define MAX_STORE 1
37967 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
37968 enum dispatch_group {
37983 /* Number of allowable groups in a dispatch window. It is an array
37984 indexed by dispatch_group enum. 100 is used as a big number,
37985 because the number of these kind of operations does not have any
37986 effect in dispatch window, but we need them for other reasons in
37988 static unsigned int num_allowable_groups[disp_last] = {
37989 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
37992 char group_name[disp_last + 1][16] = {
37993 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
37994 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
37995 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
37998 /* Instruction path. */
38001 path_single, /* Single micro op. */
38002 path_double, /* Double micro op. */
38003 path_multi, /* Instructions with more than 2 micro op.. */
38007 /* sched_insn_info defines a window to the instructions scheduled in
38008 the basic block. It contains a pointer to the insn_info table and
38009 the instruction scheduled.
38011 Windows are allocated for each basic block and are linked
38013 typedef struct sched_insn_info_s {
38015 enum dispatch_group group;
38016 enum insn_path path;
38021 /* Linked list of dispatch windows. This is a two way list of
38022 dispatch windows of a basic block. It contains information about
38023 the number of uops in the window and the total number of
38024 instructions and of bytes in the object code for this dispatch
38026 typedef struct dispatch_windows_s {
38027 int num_insn; /* Number of insn in the window. */
38028 int num_uops; /* Number of uops in the window. */
38029 int window_size; /* Number of bytes in the window. */
38030 int window_num; /* Window number between 0 or 1. */
38031 int num_imm; /* Number of immediates in an insn. */
38032 int num_imm_32; /* Number of 32 bit immediates in an insn. */
38033 int num_imm_64; /* Number of 64 bit immediates in an insn. */
38034 int imm_size; /* Total immediates in the window. */
38035 int num_loads; /* Total memory loads in the window. */
38036 int num_stores; /* Total memory stores in the window. */
38037 int violation; /* Violation exists in window. */
38038 sched_insn_info *window; /* Pointer to the window. */
38039 struct dispatch_windows_s *next;
38040 struct dispatch_windows_s *prev;
38041 } dispatch_windows;
38043 /* Immediate valuse used in an insn. */
38044 typedef struct imm_info_s
38051 static dispatch_windows *dispatch_window_list;
38052 static dispatch_windows *dispatch_window_list1;
38054 /* Get dispatch group of insn. */
38056 static enum dispatch_group
38057 get_mem_group (rtx insn)
38059 enum attr_memory memory;
38061 if (INSN_CODE (insn) < 0)
38062 return disp_no_group;
38063 memory = get_attr_memory (insn);
38064 if (memory == MEMORY_STORE)
38067 if (memory == MEMORY_LOAD)
38070 if (memory == MEMORY_BOTH)
38071 return disp_load_store;
38073 return disp_no_group;
38076 /* Return true if insn is a compare instruction. */
38081 enum attr_type type;
38083 type = get_attr_type (insn);
38084 return (type == TYPE_TEST
38085 || type == TYPE_ICMP
38086 || type == TYPE_FCMP
38087 || GET_CODE (PATTERN (insn)) == COMPARE);
38090 /* Return true if a dispatch violation encountered. */
38093 dispatch_violation (void)
38095 if (dispatch_window_list->next)
38096 return dispatch_window_list->next->violation;
38097 return dispatch_window_list->violation;
38100 /* Return true if insn is a branch instruction. */
38103 is_branch (rtx insn)
38105 return (CALL_P (insn) || JUMP_P (insn));
38108 /* Return true if insn is a prefetch instruction. */
38111 is_prefetch (rtx insn)
38113 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
38116 /* This function initializes a dispatch window and the list container holding a
38117 pointer to the window. */
38120 init_window (int window_num)
38123 dispatch_windows *new_list;
38125 if (window_num == 0)
38126 new_list = dispatch_window_list;
38128 new_list = dispatch_window_list1;
38130 new_list->num_insn = 0;
38131 new_list->num_uops = 0;
38132 new_list->window_size = 0;
38133 new_list->next = NULL;
38134 new_list->prev = NULL;
38135 new_list->window_num = window_num;
38136 new_list->num_imm = 0;
38137 new_list->num_imm_32 = 0;
38138 new_list->num_imm_64 = 0;
38139 new_list->imm_size = 0;
38140 new_list->num_loads = 0;
38141 new_list->num_stores = 0;
38142 new_list->violation = false;
38144 for (i = 0; i < MAX_INSN; i++)
38146 new_list->window[i].insn = NULL;
38147 new_list->window[i].group = disp_no_group;
38148 new_list->window[i].path = no_path;
38149 new_list->window[i].byte_len = 0;
38150 new_list->window[i].imm_bytes = 0;
38155 /* This function allocates and initializes a dispatch window and the
38156 list container holding a pointer to the window. */
38158 static dispatch_windows *
38159 allocate_window (void)
38161 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
38162 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
38167 /* This routine initializes the dispatch scheduling information. It
38168 initiates building dispatch scheduler tables and constructs the
38169 first dispatch window. */
38172 init_dispatch_sched (void)
38174 /* Allocate a dispatch list and a window. */
38175 dispatch_window_list = allocate_window ();
38176 dispatch_window_list1 = allocate_window ();
38181 /* This function returns true if a branch is detected. End of a basic block
38182 does not have to be a branch, but here we assume only branches end a
38186 is_end_basic_block (enum dispatch_group group)
38188 return group == disp_branch;
38191 /* This function is called when the end of a window processing is reached. */
38194 process_end_window (void)
38196 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
38197 if (dispatch_window_list->next)
38199 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
38200 gcc_assert (dispatch_window_list->window_size
38201 + dispatch_window_list1->window_size <= 48);
38207 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
38208 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
38209 for 48 bytes of instructions. Note that these windows are not dispatch
38210 windows that their sizes are DISPATCH_WINDOW_SIZE. */
38212 static dispatch_windows *
38213 allocate_next_window (int window_num)
38215 if (window_num == 0)
38217 if (dispatch_window_list->next)
38220 return dispatch_window_list;
38223 dispatch_window_list->next = dispatch_window_list1;
38224 dispatch_window_list1->prev = dispatch_window_list;
38226 return dispatch_window_list1;
38229 /* Increment the number of immediate operands of an instruction. */
38232 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
38237 switch ( GET_CODE (*in_rtx))
38242 (imm_values->imm)++;
38243 if (x86_64_immediate_operand (*in_rtx, SImode))
38244 (imm_values->imm32)++;
38246 (imm_values->imm64)++;
38250 (imm_values->imm)++;
38251 (imm_values->imm64)++;
38255 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
38257 (imm_values->imm)++;
38258 (imm_values->imm32)++;
38269 /* Compute number of immediate operands of an instruction. */
38272 find_constant (rtx in_rtx, imm_info *imm_values)
38274 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
38275 (rtx_function) find_constant_1, (void *) imm_values);
38278 /* Return total size of immediate operands of an instruction along with number
38279 of corresponding immediate-operands. It initializes its parameters to zero
38280 befor calling FIND_CONSTANT.
38281 INSN is the input instruction. IMM is the total of immediates.
38282 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
38286 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
38288 imm_info imm_values = {0, 0, 0};
38290 find_constant (insn, &imm_values);
38291 *imm = imm_values.imm;
38292 *imm32 = imm_values.imm32;
38293 *imm64 = imm_values.imm64;
38294 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
38297 /* This function indicates if an operand of an instruction is an
38301 has_immediate (rtx insn)
38303 int num_imm_operand;
38304 int num_imm32_operand;
38305 int num_imm64_operand;
38308 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38309 &num_imm64_operand);
38313 /* Return single or double path for instructions. */
38315 static enum insn_path
38316 get_insn_path (rtx insn)
38318 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
38320 if ((int)path == 0)
38321 return path_single;
38323 if ((int)path == 1)
38324 return path_double;
38329 /* Return insn dispatch group. */
38331 static enum dispatch_group
38332 get_insn_group (rtx insn)
38334 enum dispatch_group group = get_mem_group (insn);
38338 if (is_branch (insn))
38339 return disp_branch;
38344 if (has_immediate (insn))
38347 if (is_prefetch (insn))
38348 return disp_prefetch;
38350 return disp_no_group;
38353 /* Count number of GROUP restricted instructions in a dispatch
38354 window WINDOW_LIST. */
38357 count_num_restricted (rtx insn, dispatch_windows *window_list)
38359 enum dispatch_group group = get_insn_group (insn);
38361 int num_imm_operand;
38362 int num_imm32_operand;
38363 int num_imm64_operand;
38365 if (group == disp_no_group)
38368 if (group == disp_imm)
38370 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38371 &num_imm64_operand);
38372 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
38373 || num_imm_operand + window_list->num_imm > MAX_IMM
38374 || (num_imm32_operand > 0
38375 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
38376 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
38377 || (num_imm64_operand > 0
38378 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
38379 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
38380 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
38381 && num_imm64_operand > 0
38382 && ((window_list->num_imm_64 > 0
38383 && window_list->num_insn >= 2)
38384 || window_list->num_insn >= 3)))
38390 if ((group == disp_load_store
38391 && (window_list->num_loads >= MAX_LOAD
38392 || window_list->num_stores >= MAX_STORE))
38393 || ((group == disp_load
38394 || group == disp_prefetch)
38395 && window_list->num_loads >= MAX_LOAD)
38396 || (group == disp_store
38397 && window_list->num_stores >= MAX_STORE))
38403 /* This function returns true if insn satisfies dispatch rules on the
38404 last window scheduled. */
38407 fits_dispatch_window (rtx insn)
38409 dispatch_windows *window_list = dispatch_window_list;
38410 dispatch_windows *window_list_next = dispatch_window_list->next;
38411 unsigned int num_restrict;
38412 enum dispatch_group group = get_insn_group (insn);
38413 enum insn_path path = get_insn_path (insn);
38416 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
38417 instructions should be given the lowest priority in the
38418 scheduling process in Haifa scheduler to make sure they will be
38419 scheduled in the same dispatch window as the refrence to them. */
38420 if (group == disp_jcc || group == disp_cmp)
38423 /* Check nonrestricted. */
38424 if (group == disp_no_group || group == disp_branch)
38427 /* Get last dispatch window. */
38428 if (window_list_next)
38429 window_list = window_list_next;
38431 if (window_list->window_num == 1)
38433 sum = window_list->prev->window_size + window_list->window_size;
38436 || (min_insn_size (insn) + sum) >= 48)
38437 /* Window 1 is full. Go for next window. */
38441 num_restrict = count_num_restricted (insn, window_list);
38443 if (num_restrict > num_allowable_groups[group])
38446 /* See if it fits in the first window. */
38447 if (window_list->window_num == 0)
38449 /* The first widow should have only single and double path
38451 if (path == path_double
38452 && (window_list->num_uops + 2) > MAX_INSN)
38454 else if (path != path_single)
38460 /* Add an instruction INSN with NUM_UOPS micro-operations to the
38461 dispatch window WINDOW_LIST. */
38464 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
38466 int byte_len = min_insn_size (insn);
38467 int num_insn = window_list->num_insn;
38469 sched_insn_info *window = window_list->window;
38470 enum dispatch_group group = get_insn_group (insn);
38471 enum insn_path path = get_insn_path (insn);
38472 int num_imm_operand;
38473 int num_imm32_operand;
38474 int num_imm64_operand;
38476 if (!window_list->violation && group != disp_cmp
38477 && !fits_dispatch_window (insn))
38478 window_list->violation = true;
38480 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38481 &num_imm64_operand);
38483 /* Initialize window with new instruction. */
38484 window[num_insn].insn = insn;
38485 window[num_insn].byte_len = byte_len;
38486 window[num_insn].group = group;
38487 window[num_insn].path = path;
38488 window[num_insn].imm_bytes = imm_size;
38490 window_list->window_size += byte_len;
38491 window_list->num_insn = num_insn + 1;
38492 window_list->num_uops = window_list->num_uops + num_uops;
38493 window_list->imm_size += imm_size;
38494 window_list->num_imm += num_imm_operand;
38495 window_list->num_imm_32 += num_imm32_operand;
38496 window_list->num_imm_64 += num_imm64_operand;
38498 if (group == disp_store)
38499 window_list->num_stores += 1;
38500 else if (group == disp_load
38501 || group == disp_prefetch)
38502 window_list->num_loads += 1;
38503 else if (group == disp_load_store)
38505 window_list->num_stores += 1;
38506 window_list->num_loads += 1;
38510 /* Adds a scheduled instruction, INSN, to the current dispatch window.
38511 If the total bytes of instructions or the number of instructions in
38512 the window exceed allowable, it allocates a new window. */
38515 add_to_dispatch_window (rtx insn)
38518 dispatch_windows *window_list;
38519 dispatch_windows *next_list;
38520 dispatch_windows *window0_list;
38521 enum insn_path path;
38522 enum dispatch_group insn_group;
38530 if (INSN_CODE (insn) < 0)
38533 byte_len = min_insn_size (insn);
38534 window_list = dispatch_window_list;
38535 next_list = window_list->next;
38536 path = get_insn_path (insn);
38537 insn_group = get_insn_group (insn);
38539 /* Get the last dispatch window. */
38541 window_list = dispatch_window_list->next;
38543 if (path == path_single)
38545 else if (path == path_double)
38548 insn_num_uops = (int) path;
38550 /* If current window is full, get a new window.
38551 Window number zero is full, if MAX_INSN uops are scheduled in it.
38552 Window number one is full, if window zero's bytes plus window
38553 one's bytes is 32, or if the bytes of the new instruction added
38554 to the total makes it greater than 48, or it has already MAX_INSN
38555 instructions in it. */
38556 num_insn = window_list->num_insn;
38557 num_uops = window_list->num_uops;
38558 window_num = window_list->window_num;
38559 insn_fits = fits_dispatch_window (insn);
38561 if (num_insn >= MAX_INSN
38562 || num_uops + insn_num_uops > MAX_INSN
38565 window_num = ~window_num & 1;
38566 window_list = allocate_next_window (window_num);
38569 if (window_num == 0)
38571 add_insn_window (insn, window_list, insn_num_uops);
38572 if (window_list->num_insn >= MAX_INSN
38573 && insn_group == disp_branch)
38575 process_end_window ();
38579 else if (window_num == 1)
38581 window0_list = window_list->prev;
38582 sum = window0_list->window_size + window_list->window_size;
38584 || (byte_len + sum) >= 48)
38586 process_end_window ();
38587 window_list = dispatch_window_list;
38590 add_insn_window (insn, window_list, insn_num_uops);
38593 gcc_unreachable ();
38595 if (is_end_basic_block (insn_group))
38597 /* End of basic block is reached do end-basic-block process. */
38598 process_end_window ();
38603 /* Print the dispatch window, WINDOW_NUM, to FILE. */
38605 DEBUG_FUNCTION static void
38606 debug_dispatch_window_file (FILE *file, int window_num)
38608 dispatch_windows *list;
38611 if (window_num == 0)
38612 list = dispatch_window_list;
38614 list = dispatch_window_list1;
38616 fprintf (file, "Window #%d:\n", list->window_num);
38617 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
38618 list->num_insn, list->num_uops, list->window_size);
38619 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38620 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
38622 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
38624 fprintf (file, " insn info:\n");
38626 for (i = 0; i < MAX_INSN; i++)
38628 if (!list->window[i].insn)
38630 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
38631 i, group_name[list->window[i].group],
38632 i, (void *)list->window[i].insn,
38633 i, list->window[i].path,
38634 i, list->window[i].byte_len,
38635 i, list->window[i].imm_bytes);
38639 /* Print to stdout a dispatch window. */
38641 DEBUG_FUNCTION void
38642 debug_dispatch_window (int window_num)
38644 debug_dispatch_window_file (stdout, window_num);
38647 /* Print INSN dispatch information to FILE. */
38649 DEBUG_FUNCTION static void
38650 debug_insn_dispatch_info_file (FILE *file, rtx insn)
38653 enum insn_path path;
38654 enum dispatch_group group;
38656 int num_imm_operand;
38657 int num_imm32_operand;
38658 int num_imm64_operand;
38660 if (INSN_CODE (insn) < 0)
38663 byte_len = min_insn_size (insn);
38664 path = get_insn_path (insn);
38665 group = get_insn_group (insn);
38666 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38667 &num_imm64_operand);
38669 fprintf (file, " insn info:\n");
38670 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
38671 group_name[group], path, byte_len);
38672 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38673 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
38676 /* Print to STDERR the status of the ready list with respect to
38677 dispatch windows. */
38679 DEBUG_FUNCTION void
38680 debug_ready_dispatch (void)
38683 int no_ready = number_in_ready ();
38685 fprintf (stdout, "Number of ready: %d\n", no_ready);
38687 for (i = 0; i < no_ready; i++)
38688 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
38691 /* This routine is the driver of the dispatch scheduler. */
38694 do_dispatch (rtx insn, int mode)
38696 if (mode == DISPATCH_INIT)
38697 init_dispatch_sched ();
38698 else if (mode == ADD_TO_DISPATCH_WINDOW)
38699 add_to_dispatch_window (insn);
38702 /* Return TRUE if Dispatch Scheduling is supported. */
38705 has_dispatch (rtx insn, int action)
38707 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
38708 && flag_dispatch_scheduler)
38714 case IS_DISPATCH_ON:
38719 return is_cmp (insn);
38721 case DISPATCH_VIOLATION:
38722 return dispatch_violation ();
38724 case FITS_DISPATCH_WINDOW:
38725 return fits_dispatch_window (insn);
38731 /* Implementation of reassociation_width target hook used by
38732 reassoc phase to identify parallelism level in reassociated
38733 tree. Statements tree_code is passed in OPC. Arguments type
38736 Currently parallel reassociation is enabled for Atom
38737 processors only and we set reassociation width to be 2
38738 because Atom may issue up to 2 instructions per cycle.
38740 Return value should be fixed if parallel reassociation is
38741 enabled for other processors. */
38744 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
38745 enum machine_mode mode)
38749 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
38751 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
38757 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
38758 place emms and femms instructions. */
38760 static enum machine_mode
38761 ix86_preferred_simd_mode (enum machine_mode mode)
38769 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
38771 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
38773 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
38775 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
38778 if (TARGET_AVX && !TARGET_PREFER_AVX128)
38784 if (!TARGET_VECTORIZE_DOUBLE)
38786 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
38788 else if (TARGET_SSE2)
38797 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
38800 static unsigned int
38801 ix86_autovectorize_vector_sizes (void)
38803 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
38806 /* Initialize the GCC target structure. */
38807 #undef TARGET_RETURN_IN_MEMORY
38808 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
38810 #undef TARGET_LEGITIMIZE_ADDRESS
38811 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
38813 #undef TARGET_ATTRIBUTE_TABLE
38814 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
38815 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38816 # undef TARGET_MERGE_DECL_ATTRIBUTES
38817 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
38820 #undef TARGET_COMP_TYPE_ATTRIBUTES
38821 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
38823 #undef TARGET_INIT_BUILTINS
38824 #define TARGET_INIT_BUILTINS ix86_init_builtins
38825 #undef TARGET_BUILTIN_DECL
38826 #define TARGET_BUILTIN_DECL ix86_builtin_decl
38827 #undef TARGET_EXPAND_BUILTIN
38828 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
38830 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
38831 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
38832 ix86_builtin_vectorized_function
38834 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
38835 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
38837 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
38838 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
38840 #undef TARGET_VECTORIZE_BUILTIN_GATHER
38841 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
38843 #undef TARGET_BUILTIN_RECIPROCAL
38844 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
38846 #undef TARGET_ASM_FUNCTION_EPILOGUE
38847 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
38849 #undef TARGET_ENCODE_SECTION_INFO
38850 #ifndef SUBTARGET_ENCODE_SECTION_INFO
38851 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
38853 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
38856 #undef TARGET_ASM_OPEN_PAREN
38857 #define TARGET_ASM_OPEN_PAREN ""
38858 #undef TARGET_ASM_CLOSE_PAREN
38859 #define TARGET_ASM_CLOSE_PAREN ""
38861 #undef TARGET_ASM_BYTE_OP
38862 #define TARGET_ASM_BYTE_OP ASM_BYTE
38864 #undef TARGET_ASM_ALIGNED_HI_OP
38865 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
38866 #undef TARGET_ASM_ALIGNED_SI_OP
38867 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
38869 #undef TARGET_ASM_ALIGNED_DI_OP
38870 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
38873 #undef TARGET_PROFILE_BEFORE_PROLOGUE
38874 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
38876 #undef TARGET_ASM_UNALIGNED_HI_OP
38877 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
38878 #undef TARGET_ASM_UNALIGNED_SI_OP
38879 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
38880 #undef TARGET_ASM_UNALIGNED_DI_OP
38881 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
38883 #undef TARGET_PRINT_OPERAND
38884 #define TARGET_PRINT_OPERAND ix86_print_operand
38885 #undef TARGET_PRINT_OPERAND_ADDRESS
38886 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
38887 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
38888 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
38889 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
38890 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
38892 #undef TARGET_SCHED_INIT_GLOBAL
38893 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
38894 #undef TARGET_SCHED_ADJUST_COST
38895 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
38896 #undef TARGET_SCHED_ISSUE_RATE
38897 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
38898 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
38899 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
38900 ia32_multipass_dfa_lookahead
38902 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
38903 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
38906 #undef TARGET_HAVE_TLS
38907 #define TARGET_HAVE_TLS true
38909 #undef TARGET_CANNOT_FORCE_CONST_MEM
38910 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
38911 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
38912 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
38914 #undef TARGET_DELEGITIMIZE_ADDRESS
38915 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
38917 #undef TARGET_MS_BITFIELD_LAYOUT_P
38918 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
38921 #undef TARGET_BINDS_LOCAL_P
38922 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
38924 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38925 #undef TARGET_BINDS_LOCAL_P
38926 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
38929 #undef TARGET_ASM_OUTPUT_MI_THUNK
38930 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
38931 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
38932 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
38934 #undef TARGET_ASM_FILE_START
38935 #define TARGET_ASM_FILE_START x86_file_start
38937 #undef TARGET_OPTION_OVERRIDE
38938 #define TARGET_OPTION_OVERRIDE ix86_option_override
38940 #undef TARGET_REGISTER_MOVE_COST
38941 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
38942 #undef TARGET_MEMORY_MOVE_COST
38943 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
38944 #undef TARGET_RTX_COSTS
38945 #define TARGET_RTX_COSTS ix86_rtx_costs
38946 #undef TARGET_ADDRESS_COST
38947 #define TARGET_ADDRESS_COST ix86_address_cost
38949 #undef TARGET_FIXED_CONDITION_CODE_REGS
38950 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
38951 #undef TARGET_CC_MODES_COMPATIBLE
38952 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
38954 #undef TARGET_MACHINE_DEPENDENT_REORG
38955 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
38957 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
38958 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
38960 #undef TARGET_BUILD_BUILTIN_VA_LIST
38961 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
38963 #undef TARGET_ENUM_VA_LIST_P
38964 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
38966 #undef TARGET_FN_ABI_VA_LIST
38967 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
38969 #undef TARGET_CANONICAL_VA_LIST_TYPE
38970 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
38972 #undef TARGET_EXPAND_BUILTIN_VA_START
38973 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
38975 #undef TARGET_MD_ASM_CLOBBERS
38976 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
38978 #undef TARGET_PROMOTE_PROTOTYPES
38979 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
38980 #undef TARGET_STRUCT_VALUE_RTX
38981 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
38982 #undef TARGET_SETUP_INCOMING_VARARGS
38983 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
38984 #undef TARGET_MUST_PASS_IN_STACK
38985 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
38986 #undef TARGET_FUNCTION_ARG_ADVANCE
38987 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
38988 #undef TARGET_FUNCTION_ARG
38989 #define TARGET_FUNCTION_ARG ix86_function_arg
38990 #undef TARGET_FUNCTION_ARG_BOUNDARY
38991 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
38992 #undef TARGET_PASS_BY_REFERENCE
38993 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
38994 #undef TARGET_INTERNAL_ARG_POINTER
38995 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
38996 #undef TARGET_UPDATE_STACK_BOUNDARY
38997 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
38998 #undef TARGET_GET_DRAP_RTX
38999 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
39000 #undef TARGET_STRICT_ARGUMENT_NAMING
39001 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
39002 #undef TARGET_STATIC_CHAIN
39003 #define TARGET_STATIC_CHAIN ix86_static_chain
39004 #undef TARGET_TRAMPOLINE_INIT
39005 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
39006 #undef TARGET_RETURN_POPS_ARGS
39007 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
39009 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
39010 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
39012 #undef TARGET_SCALAR_MODE_SUPPORTED_P
39013 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
39015 #undef TARGET_VECTOR_MODE_SUPPORTED_P
39016 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
39018 #undef TARGET_C_MODE_FOR_SUFFIX
39019 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
39022 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
39023 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
39026 #ifdef SUBTARGET_INSERT_ATTRIBUTES
39027 #undef TARGET_INSERT_ATTRIBUTES
39028 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
39031 #undef TARGET_MANGLE_TYPE
39032 #define TARGET_MANGLE_TYPE ix86_mangle_type
39035 #undef TARGET_STACK_PROTECT_FAIL
39036 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
39039 #undef TARGET_FUNCTION_VALUE
39040 #define TARGET_FUNCTION_VALUE ix86_function_value
39042 #undef TARGET_FUNCTION_VALUE_REGNO_P
39043 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
39045 #undef TARGET_PROMOTE_FUNCTION_MODE
39046 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
39048 #undef TARGET_INSTANTIATE_DECLS
39049 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
39051 #undef TARGET_SECONDARY_RELOAD
39052 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
39054 #undef TARGET_CLASS_MAX_NREGS
39055 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
39057 #undef TARGET_PREFERRED_RELOAD_CLASS
39058 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
39059 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
39060 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
39061 #undef TARGET_CLASS_LIKELY_SPILLED_P
39062 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
39064 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
39065 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
39066 ix86_builtin_vectorization_cost
39067 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
39068 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
39069 ix86_vectorize_vec_perm_const_ok
39070 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
39071 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
39072 ix86_preferred_simd_mode
39073 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
39074 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
39075 ix86_autovectorize_vector_sizes
39077 #undef TARGET_SET_CURRENT_FUNCTION
39078 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
39080 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
39081 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
39083 #undef TARGET_OPTION_SAVE
39084 #define TARGET_OPTION_SAVE ix86_function_specific_save
39086 #undef TARGET_OPTION_RESTORE
39087 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
39089 #undef TARGET_OPTION_PRINT
39090 #define TARGET_OPTION_PRINT ix86_function_specific_print
39092 #undef TARGET_CAN_INLINE_P
39093 #define TARGET_CAN_INLINE_P ix86_can_inline_p
39095 #undef TARGET_EXPAND_TO_RTL_HOOK
39096 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
39098 #undef TARGET_LEGITIMATE_ADDRESS_P
39099 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
39101 #undef TARGET_LEGITIMATE_CONSTANT_P
39102 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
39104 #undef TARGET_FRAME_POINTER_REQUIRED
39105 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
39107 #undef TARGET_CAN_ELIMINATE
39108 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
39110 #undef TARGET_EXTRA_LIVE_ON_ENTRY
39111 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
39113 #undef TARGET_ASM_CODE_END
39114 #define TARGET_ASM_CODE_END ix86_code_end
39116 #undef TARGET_CONDITIONAL_REGISTER_USAGE
39117 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
39120 #undef TARGET_INIT_LIBFUNCS
39121 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
39124 struct gcc_target targetm = TARGET_INITIALIZER;
39126 #include "gt-i386.h"