1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
55 #include "tm-constrs.h"
59 #include "sched-int.h"
63 #include "diagnostic.h"
65 enum upper_128bits_state
72 typedef struct block_info_def
74 /* State of the upper 128bits of AVX registers at exit. */
75 enum upper_128bits_state state;
76 /* TRUE if state of the upper 128bits of AVX registers is unchanged
79 /* TRUE if block has been processed. */
81 /* TRUE if block has been scanned. */
83 /* Previous state of the upper 128bits of AVX registers at entry. */
84 enum upper_128bits_state prev;
87 #define BLOCK_INFO(B) ((block_info) (B)->aux)
89 enum call_avx256_state
91 /* Callee returns 256bit AVX register. */
92 callee_return_avx256 = -1,
93 /* Callee returns and passes 256bit AVX register. */
94 callee_return_pass_avx256,
95 /* Callee passes 256bit AVX register. */
97 /* Callee doesn't return nor passe 256bit AVX register, or no
98 256bit AVX register in function return. */
100 /* vzeroupper intrinsic. */
104 /* Check if a 256bit AVX register is referenced in stores. */
107 check_avx256_stores (rtx dest, const_rtx set, void *data)
110 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
111 || (GET_CODE (set) == SET
112 && REG_P (SET_SRC (set))
113 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
115 enum upper_128bits_state *state
116 = (enum upper_128bits_state *) data;
121 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
122 in basic block BB. Delete it if upper 128bit AVX registers are
123 unused. If it isn't deleted, move it to just before a jump insn.
125 STATE is state of the upper 128bits of AVX registers at entry. */
128 move_or_delete_vzeroupper_2 (basic_block bb,
129 enum upper_128bits_state state)
132 rtx vzeroupper_insn = NULL_RTX;
137 if (BLOCK_INFO (bb)->unchanged)
140 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
143 BLOCK_INFO (bb)->state = state;
147 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
150 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
151 bb->index, BLOCK_INFO (bb)->state);
155 BLOCK_INFO (bb)->prev = state;
158 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
163 /* BB_END changes when it is deleted. */
164 bb_end = BB_END (bb);
166 while (insn != bb_end)
168 insn = NEXT_INSN (insn);
170 if (!NONDEBUG_INSN_P (insn))
173 /* Move vzeroupper before jump/call. */
174 if (JUMP_P (insn) || CALL_P (insn))
176 if (!vzeroupper_insn)
179 if (PREV_INSN (insn) != vzeroupper_insn)
183 fprintf (dump_file, "Move vzeroupper after:\n");
184 print_rtl_single (dump_file, PREV_INSN (insn));
185 fprintf (dump_file, "before:\n");
186 print_rtl_single (dump_file, insn);
188 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
191 vzeroupper_insn = NULL_RTX;
195 pat = PATTERN (insn);
197 /* Check insn for vzeroupper intrinsic. */
198 if (GET_CODE (pat) == UNSPEC_VOLATILE
199 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
203 /* Found vzeroupper intrinsic. */
204 fprintf (dump_file, "Found vzeroupper:\n");
205 print_rtl_single (dump_file, insn);
210 /* Check insn for vzeroall intrinsic. */
211 if (GET_CODE (pat) == PARALLEL
212 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
213 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
218 /* Delete pending vzeroupper insertion. */
221 delete_insn (vzeroupper_insn);
222 vzeroupper_insn = NULL_RTX;
225 else if (state != used)
227 note_stores (pat, check_avx256_stores, &state);
234 /* Process vzeroupper intrinsic. */
235 avx256 = INTVAL (XVECEXP (pat, 0, 0));
239 /* Since the upper 128bits are cleared, callee must not pass
240 256bit AVX register. We only need to check if callee
241 returns 256bit AVX register. */
242 if (avx256 == callee_return_avx256)
248 /* Remove unnecessary vzeroupper since upper 128bits are
252 fprintf (dump_file, "Delete redundant vzeroupper:\n");
253 print_rtl_single (dump_file, insn);
259 /* Set state to UNUSED if callee doesn't return 256bit AVX
261 if (avx256 != callee_return_pass_avx256)
264 if (avx256 == callee_return_pass_avx256
265 || avx256 == callee_pass_avx256)
267 /* Must remove vzeroupper since callee passes in 256bit
271 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
272 print_rtl_single (dump_file, insn);
278 vzeroupper_insn = insn;
284 BLOCK_INFO (bb)->state = state;
285 BLOCK_INFO (bb)->unchanged = unchanged;
286 BLOCK_INFO (bb)->scanned = true;
289 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
290 bb->index, unchanged ? "unchanged" : "changed",
294 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
295 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
296 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
300 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
304 enum upper_128bits_state state, old_state, new_state;
308 fprintf (dump_file, " Process [bb %i]: status: %d\n",
309 block->index, BLOCK_INFO (block)->processed);
311 if (BLOCK_INFO (block)->processed)
316 /* Check all predecessor edges of this block. */
317 seen_unknown = false;
318 FOR_EACH_EDGE (e, ei, block->preds)
322 switch (BLOCK_INFO (e->src)->state)
325 if (!unknown_is_unused)
339 old_state = BLOCK_INFO (block)->state;
340 move_or_delete_vzeroupper_2 (block, state);
341 new_state = BLOCK_INFO (block)->state;
343 if (state != unknown || new_state == used)
344 BLOCK_INFO (block)->processed = true;
346 /* Need to rescan if the upper 128bits of AVX registers are changed
348 if (new_state != old_state)
350 if (new_state == used)
351 cfun->machine->rescan_vzeroupper_p = 1;
358 /* Go through the instruction stream looking for vzeroupper. Delete
359 it if upper 128bit AVX registers are unused. If it isn't deleted,
360 move it to just before a jump insn. */
363 move_or_delete_vzeroupper (void)
368 fibheap_t worklist, pending, fibheap_swap;
369 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
374 /* Set up block info for each basic block. */
375 alloc_aux_for_blocks (sizeof (struct block_info_def));
377 /* Process outgoing edges of entry point. */
379 fprintf (dump_file, "Process outgoing edges of entry point\n");
381 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
383 move_or_delete_vzeroupper_2 (e->dest,
384 cfun->machine->caller_pass_avx256_p
386 BLOCK_INFO (e->dest)->processed = true;
389 /* Compute reverse completion order of depth first search of the CFG
390 so that the data-flow runs faster. */
391 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
392 bb_order = XNEWVEC (int, last_basic_block);
393 pre_and_rev_post_order_compute (NULL, rc_order, false);
394 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
395 bb_order[rc_order[i]] = i;
398 worklist = fibheap_new ();
399 pending = fibheap_new ();
400 visited = sbitmap_alloc (last_basic_block);
401 in_worklist = sbitmap_alloc (last_basic_block);
402 in_pending = sbitmap_alloc (last_basic_block);
403 sbitmap_zero (in_worklist);
405 /* Don't check outgoing edges of entry point. */
406 sbitmap_ones (in_pending);
408 if (BLOCK_INFO (bb)->processed)
409 RESET_BIT (in_pending, bb->index);
412 move_or_delete_vzeroupper_1 (bb, false);
413 fibheap_insert (pending, bb_order[bb->index], bb);
417 fprintf (dump_file, "Check remaining basic blocks\n");
419 while (!fibheap_empty (pending))
421 fibheap_swap = pending;
423 worklist = fibheap_swap;
424 sbitmap_swap = in_pending;
425 in_pending = in_worklist;
426 in_worklist = sbitmap_swap;
428 sbitmap_zero (visited);
430 cfun->machine->rescan_vzeroupper_p = 0;
432 while (!fibheap_empty (worklist))
434 bb = (basic_block) fibheap_extract_min (worklist);
435 RESET_BIT (in_worklist, bb->index);
436 gcc_assert (!TEST_BIT (visited, bb->index));
437 if (!TEST_BIT (visited, bb->index))
441 SET_BIT (visited, bb->index);
443 if (move_or_delete_vzeroupper_1 (bb, false))
444 FOR_EACH_EDGE (e, ei, bb->succs)
446 if (e->dest == EXIT_BLOCK_PTR
447 || BLOCK_INFO (e->dest)->processed)
450 if (TEST_BIT (visited, e->dest->index))
452 if (!TEST_BIT (in_pending, e->dest->index))
454 /* Send E->DEST to next round. */
455 SET_BIT (in_pending, e->dest->index);
456 fibheap_insert (pending,
457 bb_order[e->dest->index],
461 else if (!TEST_BIT (in_worklist, e->dest->index))
463 /* Add E->DEST to current round. */
464 SET_BIT (in_worklist, e->dest->index);
465 fibheap_insert (worklist, bb_order[e->dest->index],
472 if (!cfun->machine->rescan_vzeroupper_p)
477 fibheap_delete (worklist);
478 fibheap_delete (pending);
479 sbitmap_free (visited);
480 sbitmap_free (in_worklist);
481 sbitmap_free (in_pending);
484 fprintf (dump_file, "Process remaining basic blocks\n");
487 move_or_delete_vzeroupper_1 (bb, true);
489 free_aux_for_blocks ();
492 static rtx legitimize_dllimport_symbol (rtx, bool);
494 #ifndef CHECK_STACK_LIMIT
495 #define CHECK_STACK_LIMIT (-1)
498 /* Return index of given mode in mult and division cost tables. */
499 #define MODE_INDEX(mode) \
500 ((mode) == QImode ? 0 \
501 : (mode) == HImode ? 1 \
502 : (mode) == SImode ? 2 \
503 : (mode) == DImode ? 3 \
506 /* Processor costs (relative to an add) */
507 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
508 #define COSTS_N_BYTES(N) ((N) * 2)
510 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
513 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
514 COSTS_N_BYTES (2), /* cost of an add instruction */
515 COSTS_N_BYTES (3), /* cost of a lea instruction */
516 COSTS_N_BYTES (2), /* variable shift costs */
517 COSTS_N_BYTES (3), /* constant shift costs */
518 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
519 COSTS_N_BYTES (3), /* HI */
520 COSTS_N_BYTES (3), /* SI */
521 COSTS_N_BYTES (3), /* DI */
522 COSTS_N_BYTES (5)}, /* other */
523 0, /* cost of multiply per each bit set */
524 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
525 COSTS_N_BYTES (3), /* HI */
526 COSTS_N_BYTES (3), /* SI */
527 COSTS_N_BYTES (3), /* DI */
528 COSTS_N_BYTES (5)}, /* other */
529 COSTS_N_BYTES (3), /* cost of movsx */
530 COSTS_N_BYTES (3), /* cost of movzx */
531 0, /* "large" insn */
533 2, /* cost for loading QImode using movzbl */
534 {2, 2, 2}, /* cost of loading integer registers
535 in QImode, HImode and SImode.
536 Relative to reg-reg move (2). */
537 {2, 2, 2}, /* cost of storing integer registers */
538 2, /* cost of reg,reg fld/fst */
539 {2, 2, 2}, /* cost of loading fp registers
540 in SFmode, DFmode and XFmode */
541 {2, 2, 2}, /* cost of storing fp registers
542 in SFmode, DFmode and XFmode */
543 3, /* cost of moving MMX register */
544 {3, 3}, /* cost of loading MMX registers
545 in SImode and DImode */
546 {3, 3}, /* cost of storing MMX registers
547 in SImode and DImode */
548 3, /* cost of moving SSE register */
549 {3, 3, 3}, /* cost of loading SSE registers
550 in SImode, DImode and TImode */
551 {3, 3, 3}, /* cost of storing SSE registers
552 in SImode, DImode and TImode */
553 3, /* MMX or SSE register to integer */
554 0, /* size of l1 cache */
555 0, /* size of l2 cache */
556 0, /* size of prefetch block */
557 0, /* number of parallel prefetches */
559 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
561 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
562 COSTS_N_BYTES (2), /* cost of FABS instruction. */
563 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
564 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
565 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
566 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
567 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
568 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 1, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 1, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
582 /* Processor costs (relative to an add) */
584 struct processor_costs i386_cost = { /* 386 specific costs */
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (1), /* cost of a lea instruction */
587 COSTS_N_INSNS (3), /* variable shift costs */
588 COSTS_N_INSNS (2), /* constant shift costs */
589 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (6), /* HI */
591 COSTS_N_INSNS (6), /* SI */
592 COSTS_N_INSNS (6), /* DI */
593 COSTS_N_INSNS (6)}, /* other */
594 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (23), /* HI */
597 COSTS_N_INSNS (23), /* SI */
598 COSTS_N_INSNS (23), /* DI */
599 COSTS_N_INSNS (23)}, /* other */
600 COSTS_N_INSNS (3), /* cost of movsx */
601 COSTS_N_INSNS (2), /* cost of movzx */
602 15, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {2, 4, 2}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {2, 4, 2}, /* cost of storing integer registers */
609 2, /* cost of reg,reg fld/fst */
610 {8, 8, 8}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {8, 8, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 8}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 8}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 8, 16}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 8, 16}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 3, /* MMX or SSE register to integer */
625 0, /* size of l1 cache */
626 0, /* size of l2 cache */
627 0, /* size of prefetch block */
628 0, /* number of parallel prefetches */
630 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (22), /* cost of FABS instruction. */
634 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
636 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
637 DUMMY_STRINGOP_ALGS},
638 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
639 DUMMY_STRINGOP_ALGS},
640 1, /* scalar_stmt_cost. */
641 1, /* scalar load_cost. */
642 1, /* scalar_store_cost. */
643 1, /* vec_stmt_cost. */
644 1, /* vec_to_scalar_cost. */
645 1, /* scalar_to_vec_cost. */
646 1, /* vec_align_load_cost. */
647 2, /* vec_unalign_load_cost. */
648 1, /* vec_store_cost. */
649 3, /* cond_taken_branch_cost. */
650 1, /* cond_not_taken_branch_cost. */
654 struct processor_costs i486_cost = { /* 486 specific costs */
655 COSTS_N_INSNS (1), /* cost of an add instruction */
656 COSTS_N_INSNS (1), /* cost of a lea instruction */
657 COSTS_N_INSNS (3), /* variable shift costs */
658 COSTS_N_INSNS (2), /* constant shift costs */
659 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
660 COSTS_N_INSNS (12), /* HI */
661 COSTS_N_INSNS (12), /* SI */
662 COSTS_N_INSNS (12), /* DI */
663 COSTS_N_INSNS (12)}, /* other */
664 1, /* cost of multiply per each bit set */
665 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
666 COSTS_N_INSNS (40), /* HI */
667 COSTS_N_INSNS (40), /* SI */
668 COSTS_N_INSNS (40), /* DI */
669 COSTS_N_INSNS (40)}, /* other */
670 COSTS_N_INSNS (3), /* cost of movsx */
671 COSTS_N_INSNS (2), /* cost of movzx */
672 15, /* "large" insn */
674 4, /* cost for loading QImode using movzbl */
675 {2, 4, 2}, /* cost of loading integer registers
676 in QImode, HImode and SImode.
677 Relative to reg-reg move (2). */
678 {2, 4, 2}, /* cost of storing integer registers */
679 2, /* cost of reg,reg fld/fst */
680 {8, 8, 8}, /* cost of loading fp registers
681 in SFmode, DFmode and XFmode */
682 {8, 8, 8}, /* cost of storing fp registers
683 in SFmode, DFmode and XFmode */
684 2, /* cost of moving MMX register */
685 {4, 8}, /* cost of loading MMX registers
686 in SImode and DImode */
687 {4, 8}, /* cost of storing MMX registers
688 in SImode and DImode */
689 2, /* cost of moving SSE register */
690 {4, 8, 16}, /* cost of loading SSE registers
691 in SImode, DImode and TImode */
692 {4, 8, 16}, /* cost of storing SSE registers
693 in SImode, DImode and TImode */
694 3, /* MMX or SSE register to integer */
695 4, /* size of l1 cache. 486 has 8kB cache
696 shared for code and data, so 4kB is
697 not really precise. */
698 4, /* size of l2 cache */
699 0, /* size of prefetch block */
700 0, /* number of parallel prefetches */
702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
703 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
704 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
705 COSTS_N_INSNS (3), /* cost of FABS instruction. */
706 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
707 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
708 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
709 DUMMY_STRINGOP_ALGS},
710 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
711 DUMMY_STRINGOP_ALGS},
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
726 struct processor_costs pentium_cost = {
727 COSTS_N_INSNS (1), /* cost of an add instruction */
728 COSTS_N_INSNS (1), /* cost of a lea instruction */
729 COSTS_N_INSNS (4), /* variable shift costs */
730 COSTS_N_INSNS (1), /* constant shift costs */
731 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
732 COSTS_N_INSNS (11), /* HI */
733 COSTS_N_INSNS (11), /* SI */
734 COSTS_N_INSNS (11), /* DI */
735 COSTS_N_INSNS (11)}, /* other */
736 0, /* cost of multiply per each bit set */
737 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
738 COSTS_N_INSNS (25), /* HI */
739 COSTS_N_INSNS (25), /* SI */
740 COSTS_N_INSNS (25), /* DI */
741 COSTS_N_INSNS (25)}, /* other */
742 COSTS_N_INSNS (3), /* cost of movsx */
743 COSTS_N_INSNS (2), /* cost of movzx */
744 8, /* "large" insn */
746 6, /* cost for loading QImode using movzbl */
747 {2, 4, 2}, /* cost of loading integer registers
748 in QImode, HImode and SImode.
749 Relative to reg-reg move (2). */
750 {2, 4, 2}, /* cost of storing integer registers */
751 2, /* cost of reg,reg fld/fst */
752 {2, 2, 6}, /* cost of loading fp registers
753 in SFmode, DFmode and XFmode */
754 {4, 4, 6}, /* cost of storing fp registers
755 in SFmode, DFmode and XFmode */
756 8, /* cost of moving MMX register */
757 {8, 8}, /* cost of loading MMX registers
758 in SImode and DImode */
759 {8, 8}, /* cost of storing MMX registers
760 in SImode and DImode */
761 2, /* cost of moving SSE register */
762 {4, 8, 16}, /* cost of loading SSE registers
763 in SImode, DImode and TImode */
764 {4, 8, 16}, /* cost of storing SSE registers
765 in SImode, DImode and TImode */
766 3, /* MMX or SSE register to integer */
767 8, /* size of l1 cache. */
768 8, /* size of l2 cache */
769 0, /* size of prefetch block */
770 0, /* number of parallel prefetches */
772 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
773 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
774 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
775 COSTS_N_INSNS (1), /* cost of FABS instruction. */
776 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
777 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
778 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
779 DUMMY_STRINGOP_ALGS},
780 {{libcall, {{-1, rep_prefix_4_byte}}},
781 DUMMY_STRINGOP_ALGS},
782 1, /* scalar_stmt_cost. */
783 1, /* scalar load_cost. */
784 1, /* scalar_store_cost. */
785 1, /* vec_stmt_cost. */
786 1, /* vec_to_scalar_cost. */
787 1, /* scalar_to_vec_cost. */
788 1, /* vec_align_load_cost. */
789 2, /* vec_unalign_load_cost. */
790 1, /* vec_store_cost. */
791 3, /* cond_taken_branch_cost. */
792 1, /* cond_not_taken_branch_cost. */
796 struct processor_costs pentiumpro_cost = {
797 COSTS_N_INSNS (1), /* cost of an add instruction */
798 COSTS_N_INSNS (1), /* cost of a lea instruction */
799 COSTS_N_INSNS (1), /* variable shift costs */
800 COSTS_N_INSNS (1), /* constant shift costs */
801 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
802 COSTS_N_INSNS (4), /* HI */
803 COSTS_N_INSNS (4), /* SI */
804 COSTS_N_INSNS (4), /* DI */
805 COSTS_N_INSNS (4)}, /* other */
806 0, /* cost of multiply per each bit set */
807 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
808 COSTS_N_INSNS (17), /* HI */
809 COSTS_N_INSNS (17), /* SI */
810 COSTS_N_INSNS (17), /* DI */
811 COSTS_N_INSNS (17)}, /* other */
812 COSTS_N_INSNS (1), /* cost of movsx */
813 COSTS_N_INSNS (1), /* cost of movzx */
814 8, /* "large" insn */
816 2, /* cost for loading QImode using movzbl */
817 {4, 4, 4}, /* cost of loading integer registers
818 in QImode, HImode and SImode.
819 Relative to reg-reg move (2). */
820 {2, 2, 2}, /* cost of storing integer registers */
821 2, /* cost of reg,reg fld/fst */
822 {2, 2, 6}, /* cost of loading fp registers
823 in SFmode, DFmode and XFmode */
824 {4, 4, 6}, /* cost of storing fp registers
825 in SFmode, DFmode and XFmode */
826 2, /* cost of moving MMX register */
827 {2, 2}, /* cost of loading MMX registers
828 in SImode and DImode */
829 {2, 2}, /* cost of storing MMX registers
830 in SImode and DImode */
831 2, /* cost of moving SSE register */
832 {2, 2, 8}, /* cost of loading SSE registers
833 in SImode, DImode and TImode */
834 {2, 2, 8}, /* cost of storing SSE registers
835 in SImode, DImode and TImode */
836 3, /* MMX or SSE register to integer */
837 8, /* size of l1 cache. */
838 256, /* size of l2 cache */
839 32, /* size of prefetch block */
840 6, /* number of parallel prefetches */
842 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
843 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
844 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
845 COSTS_N_INSNS (2), /* cost of FABS instruction. */
846 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
847 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
848 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
849 (we ensure the alignment). For small blocks inline loop is still a
850 noticeable win, for bigger blocks either rep movsl or rep movsb is
851 way to go. Rep movsb has apparently more expensive startup time in CPU,
852 but after 4K the difference is down in the noise. */
853 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
854 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
855 DUMMY_STRINGOP_ALGS},
856 {{rep_prefix_4_byte, {{1024, unrolled_loop},
857 {8192, rep_prefix_4_byte}, {-1, libcall}}},
858 DUMMY_STRINGOP_ALGS},
859 1, /* scalar_stmt_cost. */
860 1, /* scalar load_cost. */
861 1, /* scalar_store_cost. */
862 1, /* vec_stmt_cost. */
863 1, /* vec_to_scalar_cost. */
864 1, /* scalar_to_vec_cost. */
865 1, /* vec_align_load_cost. */
866 2, /* vec_unalign_load_cost. */
867 1, /* vec_store_cost. */
868 3, /* cond_taken_branch_cost. */
869 1, /* cond_not_taken_branch_cost. */
873 struct processor_costs geode_cost = {
874 COSTS_N_INSNS (1), /* cost of an add instruction */
875 COSTS_N_INSNS (1), /* cost of a lea instruction */
876 COSTS_N_INSNS (2), /* variable shift costs */
877 COSTS_N_INSNS (1), /* constant shift costs */
878 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
879 COSTS_N_INSNS (4), /* HI */
880 COSTS_N_INSNS (7), /* SI */
881 COSTS_N_INSNS (7), /* DI */
882 COSTS_N_INSNS (7)}, /* other */
883 0, /* cost of multiply per each bit set */
884 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
885 COSTS_N_INSNS (23), /* HI */
886 COSTS_N_INSNS (39), /* SI */
887 COSTS_N_INSNS (39), /* DI */
888 COSTS_N_INSNS (39)}, /* other */
889 COSTS_N_INSNS (1), /* cost of movsx */
890 COSTS_N_INSNS (1), /* cost of movzx */
891 8, /* "large" insn */
893 1, /* cost for loading QImode using movzbl */
894 {1, 1, 1}, /* cost of loading integer registers
895 in QImode, HImode and SImode.
896 Relative to reg-reg move (2). */
897 {1, 1, 1}, /* cost of storing integer registers */
898 1, /* cost of reg,reg fld/fst */
899 {1, 1, 1}, /* cost of loading fp registers
900 in SFmode, DFmode and XFmode */
901 {4, 6, 6}, /* cost of storing fp registers
902 in SFmode, DFmode and XFmode */
904 1, /* cost of moving MMX register */
905 {1, 1}, /* cost of loading MMX registers
906 in SImode and DImode */
907 {1, 1}, /* cost of storing MMX registers
908 in SImode and DImode */
909 1, /* cost of moving SSE register */
910 {1, 1, 1}, /* cost of loading SSE registers
911 in SImode, DImode and TImode */
912 {1, 1, 1}, /* cost of storing SSE registers
913 in SImode, DImode and TImode */
914 1, /* MMX or SSE register to integer */
915 64, /* size of l1 cache. */
916 128, /* size of l2 cache. */
917 32, /* size of prefetch block */
918 1, /* number of parallel prefetches */
920 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
921 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
922 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
923 COSTS_N_INSNS (1), /* cost of FABS instruction. */
924 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
925 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
926 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
927 DUMMY_STRINGOP_ALGS},
928 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
929 DUMMY_STRINGOP_ALGS},
930 1, /* scalar_stmt_cost. */
931 1, /* scalar load_cost. */
932 1, /* scalar_store_cost. */
933 1, /* vec_stmt_cost. */
934 1, /* vec_to_scalar_cost. */
935 1, /* scalar_to_vec_cost. */
936 1, /* vec_align_load_cost. */
937 2, /* vec_unalign_load_cost. */
938 1, /* vec_store_cost. */
939 3, /* cond_taken_branch_cost. */
940 1, /* cond_not_taken_branch_cost. */
944 struct processor_costs k6_cost = {
945 COSTS_N_INSNS (1), /* cost of an add instruction */
946 COSTS_N_INSNS (2), /* cost of a lea instruction */
947 COSTS_N_INSNS (1), /* variable shift costs */
948 COSTS_N_INSNS (1), /* constant shift costs */
949 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
950 COSTS_N_INSNS (3), /* HI */
951 COSTS_N_INSNS (3), /* SI */
952 COSTS_N_INSNS (3), /* DI */
953 COSTS_N_INSNS (3)}, /* other */
954 0, /* cost of multiply per each bit set */
955 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
956 COSTS_N_INSNS (18), /* HI */
957 COSTS_N_INSNS (18), /* SI */
958 COSTS_N_INSNS (18), /* DI */
959 COSTS_N_INSNS (18)}, /* other */
960 COSTS_N_INSNS (2), /* cost of movsx */
961 COSTS_N_INSNS (2), /* cost of movzx */
962 8, /* "large" insn */
964 3, /* cost for loading QImode using movzbl */
965 {4, 5, 4}, /* cost of loading integer registers
966 in QImode, HImode and SImode.
967 Relative to reg-reg move (2). */
968 {2, 3, 2}, /* cost of storing integer registers */
969 4, /* cost of reg,reg fld/fst */
970 {6, 6, 6}, /* cost of loading fp registers
971 in SFmode, DFmode and XFmode */
972 {4, 4, 4}, /* cost of storing fp registers
973 in SFmode, DFmode and XFmode */
974 2, /* cost of moving MMX register */
975 {2, 2}, /* cost of loading MMX registers
976 in SImode and DImode */
977 {2, 2}, /* cost of storing MMX registers
978 in SImode and DImode */
979 2, /* cost of moving SSE register */
980 {2, 2, 8}, /* cost of loading SSE registers
981 in SImode, DImode and TImode */
982 {2, 2, 8}, /* cost of storing SSE registers
983 in SImode, DImode and TImode */
984 6, /* MMX or SSE register to integer */
985 32, /* size of l1 cache. */
986 32, /* size of l2 cache. Some models
987 have integrated l2 cache, but
988 optimizing for k6 is not important
989 enough to worry about that. */
990 32, /* size of prefetch block */
991 1, /* number of parallel prefetches */
993 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
994 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
995 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
996 COSTS_N_INSNS (2), /* cost of FABS instruction. */
997 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
998 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
999 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1000 DUMMY_STRINGOP_ALGS},
1001 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1002 DUMMY_STRINGOP_ALGS},
1003 1, /* scalar_stmt_cost. */
1004 1, /* scalar load_cost. */
1005 1, /* scalar_store_cost. */
1006 1, /* vec_stmt_cost. */
1007 1, /* vec_to_scalar_cost. */
1008 1, /* scalar_to_vec_cost. */
1009 1, /* vec_align_load_cost. */
1010 2, /* vec_unalign_load_cost. */
1011 1, /* vec_store_cost. */
1012 3, /* cond_taken_branch_cost. */
1013 1, /* cond_not_taken_branch_cost. */
1017 struct processor_costs athlon_cost = {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (2), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (5), /* HI */
1024 COSTS_N_INSNS (5), /* SI */
1025 COSTS_N_INSNS (5), /* DI */
1026 COSTS_N_INSNS (5)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (26), /* HI */
1030 COSTS_N_INSNS (42), /* SI */
1031 COSTS_N_INSNS (74), /* DI */
1032 COSTS_N_INSNS (74)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1037 4, /* cost for loading QImode using movzbl */
1038 {3, 4, 3}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {3, 4, 3}, /* cost of storing integer registers */
1042 4, /* cost of reg,reg fld/fst */
1043 {4, 4, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {6, 6, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 6}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 5}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 5, /* MMX or SSE register to integer */
1058 64, /* size of l1 cache. */
1059 256, /* size of l2 cache. */
1060 64, /* size of prefetch block */
1061 6, /* number of parallel prefetches */
1062 5, /* Branch cost */
1063 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1064 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1065 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1066 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1067 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1068 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1069 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1070 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1071 128 bytes for memset. */
1072 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1073 DUMMY_STRINGOP_ALGS},
1074 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1075 DUMMY_STRINGOP_ALGS},
1076 1, /* scalar_stmt_cost. */
1077 1, /* scalar load_cost. */
1078 1, /* scalar_store_cost. */
1079 1, /* vec_stmt_cost. */
1080 1, /* vec_to_scalar_cost. */
1081 1, /* scalar_to_vec_cost. */
1082 1, /* vec_align_load_cost. */
1083 2, /* vec_unalign_load_cost. */
1084 1, /* vec_store_cost. */
1085 3, /* cond_taken_branch_cost. */
1086 1, /* cond_not_taken_branch_cost. */
1090 struct processor_costs k8_cost = {
1091 COSTS_N_INSNS (1), /* cost of an add instruction */
1092 COSTS_N_INSNS (2), /* cost of a lea instruction */
1093 COSTS_N_INSNS (1), /* variable shift costs */
1094 COSTS_N_INSNS (1), /* constant shift costs */
1095 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1096 COSTS_N_INSNS (4), /* HI */
1097 COSTS_N_INSNS (3), /* SI */
1098 COSTS_N_INSNS (4), /* DI */
1099 COSTS_N_INSNS (5)}, /* other */
1100 0, /* cost of multiply per each bit set */
1101 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1102 COSTS_N_INSNS (26), /* HI */
1103 COSTS_N_INSNS (42), /* SI */
1104 COSTS_N_INSNS (74), /* DI */
1105 COSTS_N_INSNS (74)}, /* other */
1106 COSTS_N_INSNS (1), /* cost of movsx */
1107 COSTS_N_INSNS (1), /* cost of movzx */
1108 8, /* "large" insn */
1110 4, /* cost for loading QImode using movzbl */
1111 {3, 4, 3}, /* cost of loading integer registers
1112 in QImode, HImode and SImode.
1113 Relative to reg-reg move (2). */
1114 {3, 4, 3}, /* cost of storing integer registers */
1115 4, /* cost of reg,reg fld/fst */
1116 {4, 4, 12}, /* cost of loading fp registers
1117 in SFmode, DFmode and XFmode */
1118 {6, 6, 8}, /* cost of storing fp registers
1119 in SFmode, DFmode and XFmode */
1120 2, /* cost of moving MMX register */
1121 {3, 3}, /* cost of loading MMX registers
1122 in SImode and DImode */
1123 {4, 4}, /* cost of storing MMX registers
1124 in SImode and DImode */
1125 2, /* cost of moving SSE register */
1126 {4, 3, 6}, /* cost of loading SSE registers
1127 in SImode, DImode and TImode */
1128 {4, 4, 5}, /* cost of storing SSE registers
1129 in SImode, DImode and TImode */
1130 5, /* MMX or SSE register to integer */
1131 64, /* size of l1 cache. */
1132 512, /* size of l2 cache. */
1133 64, /* size of prefetch block */
1134 /* New AMD processors never drop prefetches; if they cannot be performed
1135 immediately, they are queued. We set number of simultaneous prefetches
1136 to a large constant to reflect this (it probably is not a good idea not
1137 to limit number of prefetches at all, as their execution also takes some
1139 100, /* number of parallel prefetches */
1140 3, /* Branch cost */
1141 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1142 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1143 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1144 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1145 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1146 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1147 /* K8 has optimized REP instruction for medium sized blocks, but for very
1148 small blocks it is better to use loop. For large blocks, libcall can
1149 do nontemporary accesses and beat inline considerably. */
1150 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1151 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1152 {{libcall, {{8, loop}, {24, unrolled_loop},
1153 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1154 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1155 4, /* scalar_stmt_cost. */
1156 2, /* scalar load_cost. */
1157 2, /* scalar_store_cost. */
1158 5, /* vec_stmt_cost. */
1159 0, /* vec_to_scalar_cost. */
1160 2, /* scalar_to_vec_cost. */
1161 2, /* vec_align_load_cost. */
1162 3, /* vec_unalign_load_cost. */
1163 3, /* vec_store_cost. */
1164 3, /* cond_taken_branch_cost. */
1165 2, /* cond_not_taken_branch_cost. */
1168 struct processor_costs amdfam10_cost = {
1169 COSTS_N_INSNS (1), /* cost of an add instruction */
1170 COSTS_N_INSNS (2), /* cost of a lea instruction */
1171 COSTS_N_INSNS (1), /* variable shift costs */
1172 COSTS_N_INSNS (1), /* constant shift costs */
1173 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1174 COSTS_N_INSNS (4), /* HI */
1175 COSTS_N_INSNS (3), /* SI */
1176 COSTS_N_INSNS (4), /* DI */
1177 COSTS_N_INSNS (5)}, /* other */
1178 0, /* cost of multiply per each bit set */
1179 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1180 COSTS_N_INSNS (35), /* HI */
1181 COSTS_N_INSNS (51), /* SI */
1182 COSTS_N_INSNS (83), /* DI */
1183 COSTS_N_INSNS (83)}, /* other */
1184 COSTS_N_INSNS (1), /* cost of movsx */
1185 COSTS_N_INSNS (1), /* cost of movzx */
1186 8, /* "large" insn */
1188 4, /* cost for loading QImode using movzbl */
1189 {3, 4, 3}, /* cost of loading integer registers
1190 in QImode, HImode and SImode.
1191 Relative to reg-reg move (2). */
1192 {3, 4, 3}, /* cost of storing integer registers */
1193 4, /* cost of reg,reg fld/fst */
1194 {4, 4, 12}, /* cost of loading fp registers
1195 in SFmode, DFmode and XFmode */
1196 {6, 6, 8}, /* cost of storing fp registers
1197 in SFmode, DFmode and XFmode */
1198 2, /* cost of moving MMX register */
1199 {3, 3}, /* cost of loading MMX registers
1200 in SImode and DImode */
1201 {4, 4}, /* cost of storing MMX registers
1202 in SImode and DImode */
1203 2, /* cost of moving SSE register */
1204 {4, 4, 3}, /* cost of loading SSE registers
1205 in SImode, DImode and TImode */
1206 {4, 4, 5}, /* cost of storing SSE registers
1207 in SImode, DImode and TImode */
1208 3, /* MMX or SSE register to integer */
1210 MOVD reg64, xmmreg Double FSTORE 4
1211 MOVD reg32, xmmreg Double FSTORE 4
1213 MOVD reg64, xmmreg Double FADD 3
1215 MOVD reg32, xmmreg Double FADD 3
1217 64, /* size of l1 cache. */
1218 512, /* size of l2 cache. */
1219 64, /* size of prefetch block */
1220 /* New AMD processors never drop prefetches; if they cannot be performed
1221 immediately, they are queued. We set number of simultaneous prefetches
1222 to a large constant to reflect this (it probably is not a good idea not
1223 to limit number of prefetches at all, as their execution also takes some
1225 100, /* number of parallel prefetches */
1226 2, /* Branch cost */
1227 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1228 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1229 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1230 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1231 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1232 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1234 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1235 very small blocks it is better to use loop. For large blocks, libcall can
1236 do nontemporary accesses and beat inline considerably. */
1237 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1238 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1239 {{libcall, {{8, loop}, {24, unrolled_loop},
1240 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1241 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1242 4, /* scalar_stmt_cost. */
1243 2, /* scalar load_cost. */
1244 2, /* scalar_store_cost. */
1245 6, /* vec_stmt_cost. */
1246 0, /* vec_to_scalar_cost. */
1247 2, /* scalar_to_vec_cost. */
1248 2, /* vec_align_load_cost. */
1249 2, /* vec_unalign_load_cost. */
1250 2, /* vec_store_cost. */
1251 2, /* cond_taken_branch_cost. */
1252 1, /* cond_not_taken_branch_cost. */
1255 struct processor_costs bdver1_cost = {
1256 COSTS_N_INSNS (1), /* cost of an add instruction */
1257 COSTS_N_INSNS (1), /* cost of a lea instruction */
1258 COSTS_N_INSNS (1), /* variable shift costs */
1259 COSTS_N_INSNS (1), /* constant shift costs */
1260 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1261 COSTS_N_INSNS (4), /* HI */
1262 COSTS_N_INSNS (4), /* SI */
1263 COSTS_N_INSNS (6), /* DI */
1264 COSTS_N_INSNS (6)}, /* other */
1265 0, /* cost of multiply per each bit set */
1266 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1267 COSTS_N_INSNS (35), /* HI */
1268 COSTS_N_INSNS (51), /* SI */
1269 COSTS_N_INSNS (83), /* DI */
1270 COSTS_N_INSNS (83)}, /* other */
1271 COSTS_N_INSNS (1), /* cost of movsx */
1272 COSTS_N_INSNS (1), /* cost of movzx */
1273 8, /* "large" insn */
1275 4, /* cost for loading QImode using movzbl */
1276 {5, 5, 4}, /* cost of loading integer registers
1277 in QImode, HImode and SImode.
1278 Relative to reg-reg move (2). */
1279 {4, 4, 4}, /* cost of storing integer registers */
1280 2, /* cost of reg,reg fld/fst */
1281 {5, 5, 12}, /* cost of loading fp registers
1282 in SFmode, DFmode and XFmode */
1283 {4, 4, 8}, /* cost of storing fp registers
1284 in SFmode, DFmode and XFmode */
1285 2, /* cost of moving MMX register */
1286 {4, 4}, /* cost of loading MMX registers
1287 in SImode and DImode */
1288 {4, 4}, /* cost of storing MMX registers
1289 in SImode and DImode */
1290 2, /* cost of moving SSE register */
1291 {4, 4, 4}, /* cost of loading SSE registers
1292 in SImode, DImode and TImode */
1293 {4, 4, 4}, /* cost of storing SSE registers
1294 in SImode, DImode and TImode */
1295 2, /* MMX or SSE register to integer */
1297 MOVD reg64, xmmreg Double FSTORE 4
1298 MOVD reg32, xmmreg Double FSTORE 4
1300 MOVD reg64, xmmreg Double FADD 3
1302 MOVD reg32, xmmreg Double FADD 3
1304 16, /* size of l1 cache. */
1305 2048, /* size of l2 cache. */
1306 64, /* size of prefetch block */
1307 /* New AMD processors never drop prefetches; if they cannot be performed
1308 immediately, they are queued. We set number of simultaneous prefetches
1309 to a large constant to reflect this (it probably is not a good idea not
1310 to limit number of prefetches at all, as their execution also takes some
1312 100, /* number of parallel prefetches */
1313 2, /* Branch cost */
1314 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1315 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1316 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1317 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1318 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1319 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1321 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1322 very small blocks it is better to use loop. For large blocks, libcall
1323 can do nontemporary accesses and beat inline considerably. */
1324 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1325 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1326 {{libcall, {{8, loop}, {24, unrolled_loop},
1327 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1328 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1329 6, /* scalar_stmt_cost. */
1330 4, /* scalar load_cost. */
1331 4, /* scalar_store_cost. */
1332 6, /* vec_stmt_cost. */
1333 0, /* vec_to_scalar_cost. */
1334 2, /* scalar_to_vec_cost. */
1335 4, /* vec_align_load_cost. */
1336 4, /* vec_unalign_load_cost. */
1337 4, /* vec_store_cost. */
1338 2, /* cond_taken_branch_cost. */
1339 1, /* cond_not_taken_branch_cost. */
1342 struct processor_costs bdver2_cost = {
1343 COSTS_N_INSNS (1), /* cost of an add instruction */
1344 COSTS_N_INSNS (1), /* cost of a lea instruction */
1345 COSTS_N_INSNS (1), /* variable shift costs */
1346 COSTS_N_INSNS (1), /* constant shift costs */
1347 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1348 COSTS_N_INSNS (4), /* HI */
1349 COSTS_N_INSNS (4), /* SI */
1350 COSTS_N_INSNS (6), /* DI */
1351 COSTS_N_INSNS (6)}, /* other */
1352 0, /* cost of multiply per each bit set */
1353 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1354 COSTS_N_INSNS (35), /* HI */
1355 COSTS_N_INSNS (51), /* SI */
1356 COSTS_N_INSNS (83), /* DI */
1357 COSTS_N_INSNS (83)}, /* other */
1358 COSTS_N_INSNS (1), /* cost of movsx */
1359 COSTS_N_INSNS (1), /* cost of movzx */
1360 8, /* "large" insn */
1362 4, /* cost for loading QImode using movzbl */
1363 {5, 5, 4}, /* cost of loading integer registers
1364 in QImode, HImode and SImode.
1365 Relative to reg-reg move (2). */
1366 {4, 4, 4}, /* cost of storing integer registers */
1367 2, /* cost of reg,reg fld/fst */
1368 {5, 5, 12}, /* cost of loading fp registers
1369 in SFmode, DFmode and XFmode */
1370 {4, 4, 8}, /* cost of storing fp registers
1371 in SFmode, DFmode and XFmode */
1372 2, /* cost of moving MMX register */
1373 {4, 4}, /* cost of loading MMX registers
1374 in SImode and DImode */
1375 {4, 4}, /* cost of storing MMX registers
1376 in SImode and DImode */
1377 2, /* cost of moving SSE register */
1378 {4, 4, 4}, /* cost of loading SSE registers
1379 in SImode, DImode and TImode */
1380 {4, 4, 4}, /* cost of storing SSE registers
1381 in SImode, DImode and TImode */
1382 2, /* MMX or SSE register to integer */
1384 MOVD reg64, xmmreg Double FSTORE 4
1385 MOVD reg32, xmmreg Double FSTORE 4
1387 MOVD reg64, xmmreg Double FADD 3
1389 MOVD reg32, xmmreg Double FADD 3
1391 16, /* size of l1 cache. */
1392 2048, /* size of l2 cache. */
1393 64, /* size of prefetch block */
1394 /* New AMD processors never drop prefetches; if they cannot be performed
1395 immediately, they are queued. We set number of simultaneous prefetches
1396 to a large constant to reflect this (it probably is not a good idea not
1397 to limit number of prefetches at all, as their execution also takes some
1399 100, /* number of parallel prefetches */
1400 2, /* Branch cost */
1401 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1402 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1403 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1404 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1405 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1406 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1408 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1409 very small blocks it is better to use loop. For large blocks, libcall
1410 can do nontemporary accesses and beat inline considerably. */
1411 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1412 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1413 {{libcall, {{8, loop}, {24, unrolled_loop},
1414 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1415 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1416 6, /* scalar_stmt_cost. */
1417 4, /* scalar load_cost. */
1418 4, /* scalar_store_cost. */
1419 6, /* vec_stmt_cost. */
1420 0, /* vec_to_scalar_cost. */
1421 2, /* scalar_to_vec_cost. */
1422 4, /* vec_align_load_cost. */
1423 4, /* vec_unalign_load_cost. */
1424 4, /* vec_store_cost. */
1425 2, /* cond_taken_branch_cost. */
1426 1, /* cond_not_taken_branch_cost. */
1429 struct processor_costs btver1_cost = {
1430 COSTS_N_INSNS (1), /* cost of an add instruction */
1431 COSTS_N_INSNS (2), /* cost of a lea instruction */
1432 COSTS_N_INSNS (1), /* variable shift costs */
1433 COSTS_N_INSNS (1), /* constant shift costs */
1434 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1435 COSTS_N_INSNS (4), /* HI */
1436 COSTS_N_INSNS (3), /* SI */
1437 COSTS_N_INSNS (4), /* DI */
1438 COSTS_N_INSNS (5)}, /* other */
1439 0, /* cost of multiply per each bit set */
1440 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1441 COSTS_N_INSNS (35), /* HI */
1442 COSTS_N_INSNS (51), /* SI */
1443 COSTS_N_INSNS (83), /* DI */
1444 COSTS_N_INSNS (83)}, /* other */
1445 COSTS_N_INSNS (1), /* cost of movsx */
1446 COSTS_N_INSNS (1), /* cost of movzx */
1447 8, /* "large" insn */
1449 4, /* cost for loading QImode using movzbl */
1450 {3, 4, 3}, /* cost of loading integer registers
1451 in QImode, HImode and SImode.
1452 Relative to reg-reg move (2). */
1453 {3, 4, 3}, /* cost of storing integer registers */
1454 4, /* cost of reg,reg fld/fst */
1455 {4, 4, 12}, /* cost of loading fp registers
1456 in SFmode, DFmode and XFmode */
1457 {6, 6, 8}, /* cost of storing fp registers
1458 in SFmode, DFmode and XFmode */
1459 2, /* cost of moving MMX register */
1460 {3, 3}, /* cost of loading MMX registers
1461 in SImode and DImode */
1462 {4, 4}, /* cost of storing MMX registers
1463 in SImode and DImode */
1464 2, /* cost of moving SSE register */
1465 {4, 4, 3}, /* cost of loading SSE registers
1466 in SImode, DImode and TImode */
1467 {4, 4, 5}, /* cost of storing SSE registers
1468 in SImode, DImode and TImode */
1469 3, /* MMX or SSE register to integer */
1471 MOVD reg64, xmmreg Double FSTORE 4
1472 MOVD reg32, xmmreg Double FSTORE 4
1474 MOVD reg64, xmmreg Double FADD 3
1476 MOVD reg32, xmmreg Double FADD 3
1478 32, /* size of l1 cache. */
1479 512, /* size of l2 cache. */
1480 64, /* size of prefetch block */
1481 100, /* number of parallel prefetches */
1482 2, /* Branch cost */
1483 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1484 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1485 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1486 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1487 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1488 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1490 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1491 very small blocks it is better to use loop. For large blocks, libcall can
1492 do nontemporary accesses and beat inline considerably. */
1493 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1494 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1495 {{libcall, {{8, loop}, {24, unrolled_loop},
1496 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1497 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1498 4, /* scalar_stmt_cost. */
1499 2, /* scalar load_cost. */
1500 2, /* scalar_store_cost. */
1501 6, /* vec_stmt_cost. */
1502 0, /* vec_to_scalar_cost. */
1503 2, /* scalar_to_vec_cost. */
1504 2, /* vec_align_load_cost. */
1505 2, /* vec_unalign_load_cost. */
1506 2, /* vec_store_cost. */
1507 2, /* cond_taken_branch_cost. */
1508 1, /* cond_not_taken_branch_cost. */
1512 struct processor_costs pentium4_cost = {
1513 COSTS_N_INSNS (1), /* cost of an add instruction */
1514 COSTS_N_INSNS (3), /* cost of a lea instruction */
1515 COSTS_N_INSNS (4), /* variable shift costs */
1516 COSTS_N_INSNS (4), /* constant shift costs */
1517 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1518 COSTS_N_INSNS (15), /* HI */
1519 COSTS_N_INSNS (15), /* SI */
1520 COSTS_N_INSNS (15), /* DI */
1521 COSTS_N_INSNS (15)}, /* other */
1522 0, /* cost of multiply per each bit set */
1523 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1524 COSTS_N_INSNS (56), /* HI */
1525 COSTS_N_INSNS (56), /* SI */
1526 COSTS_N_INSNS (56), /* DI */
1527 COSTS_N_INSNS (56)}, /* other */
1528 COSTS_N_INSNS (1), /* cost of movsx */
1529 COSTS_N_INSNS (1), /* cost of movzx */
1530 16, /* "large" insn */
1532 2, /* cost for loading QImode using movzbl */
1533 {4, 5, 4}, /* cost of loading integer registers
1534 in QImode, HImode and SImode.
1535 Relative to reg-reg move (2). */
1536 {2, 3, 2}, /* cost of storing integer registers */
1537 2, /* cost of reg,reg fld/fst */
1538 {2, 2, 6}, /* cost of loading fp registers
1539 in SFmode, DFmode and XFmode */
1540 {4, 4, 6}, /* cost of storing fp registers
1541 in SFmode, DFmode and XFmode */
1542 2, /* cost of moving MMX register */
1543 {2, 2}, /* cost of loading MMX registers
1544 in SImode and DImode */
1545 {2, 2}, /* cost of storing MMX registers
1546 in SImode and DImode */
1547 12, /* cost of moving SSE register */
1548 {12, 12, 12}, /* cost of loading SSE registers
1549 in SImode, DImode and TImode */
1550 {2, 2, 8}, /* cost of storing SSE registers
1551 in SImode, DImode and TImode */
1552 10, /* MMX or SSE register to integer */
1553 8, /* size of l1 cache. */
1554 256, /* size of l2 cache. */
1555 64, /* size of prefetch block */
1556 6, /* number of parallel prefetches */
1557 2, /* Branch cost */
1558 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1559 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1560 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1561 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1562 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1563 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1564 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1565 DUMMY_STRINGOP_ALGS},
1566 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1568 DUMMY_STRINGOP_ALGS},
1569 1, /* scalar_stmt_cost. */
1570 1, /* scalar load_cost. */
1571 1, /* scalar_store_cost. */
1572 1, /* vec_stmt_cost. */
1573 1, /* vec_to_scalar_cost. */
1574 1, /* scalar_to_vec_cost. */
1575 1, /* vec_align_load_cost. */
1576 2, /* vec_unalign_load_cost. */
1577 1, /* vec_store_cost. */
1578 3, /* cond_taken_branch_cost. */
1579 1, /* cond_not_taken_branch_cost. */
1583 struct processor_costs nocona_cost = {
1584 COSTS_N_INSNS (1), /* cost of an add instruction */
1585 COSTS_N_INSNS (1), /* cost of a lea instruction */
1586 COSTS_N_INSNS (1), /* variable shift costs */
1587 COSTS_N_INSNS (1), /* constant shift costs */
1588 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1589 COSTS_N_INSNS (10), /* HI */
1590 COSTS_N_INSNS (10), /* SI */
1591 COSTS_N_INSNS (10), /* DI */
1592 COSTS_N_INSNS (10)}, /* other */
1593 0, /* cost of multiply per each bit set */
1594 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1595 COSTS_N_INSNS (66), /* HI */
1596 COSTS_N_INSNS (66), /* SI */
1597 COSTS_N_INSNS (66), /* DI */
1598 COSTS_N_INSNS (66)}, /* other */
1599 COSTS_N_INSNS (1), /* cost of movsx */
1600 COSTS_N_INSNS (1), /* cost of movzx */
1601 16, /* "large" insn */
1602 17, /* MOVE_RATIO */
1603 4, /* cost for loading QImode using movzbl */
1604 {4, 4, 4}, /* cost of loading integer registers
1605 in QImode, HImode and SImode.
1606 Relative to reg-reg move (2). */
1607 {4, 4, 4}, /* cost of storing integer registers */
1608 3, /* cost of reg,reg fld/fst */
1609 {12, 12, 12}, /* cost of loading fp registers
1610 in SFmode, DFmode and XFmode */
1611 {4, 4, 4}, /* cost of storing fp registers
1612 in SFmode, DFmode and XFmode */
1613 6, /* cost of moving MMX register */
1614 {12, 12}, /* cost of loading MMX registers
1615 in SImode and DImode */
1616 {12, 12}, /* cost of storing MMX registers
1617 in SImode and DImode */
1618 6, /* cost of moving SSE register */
1619 {12, 12, 12}, /* cost of loading SSE registers
1620 in SImode, DImode and TImode */
1621 {12, 12, 12}, /* cost of storing SSE registers
1622 in SImode, DImode and TImode */
1623 8, /* MMX or SSE register to integer */
1624 8, /* size of l1 cache. */
1625 1024, /* size of l2 cache. */
1626 128, /* size of prefetch block */
1627 8, /* number of parallel prefetches */
1628 1, /* Branch cost */
1629 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1630 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1631 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1632 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1633 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1634 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1635 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1636 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1637 {100000, unrolled_loop}, {-1, libcall}}}},
1638 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1640 {libcall, {{24, loop}, {64, unrolled_loop},
1641 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1642 1, /* scalar_stmt_cost. */
1643 1, /* scalar load_cost. */
1644 1, /* scalar_store_cost. */
1645 1, /* vec_stmt_cost. */
1646 1, /* vec_to_scalar_cost. */
1647 1, /* scalar_to_vec_cost. */
1648 1, /* vec_align_load_cost. */
1649 2, /* vec_unalign_load_cost. */
1650 1, /* vec_store_cost. */
1651 3, /* cond_taken_branch_cost. */
1652 1, /* cond_not_taken_branch_cost. */
1656 struct processor_costs atom_cost = {
1657 COSTS_N_INSNS (1), /* cost of an add instruction */
1658 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1659 COSTS_N_INSNS (1), /* variable shift costs */
1660 COSTS_N_INSNS (1), /* constant shift costs */
1661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1662 COSTS_N_INSNS (4), /* HI */
1663 COSTS_N_INSNS (3), /* SI */
1664 COSTS_N_INSNS (4), /* DI */
1665 COSTS_N_INSNS (2)}, /* other */
1666 0, /* cost of multiply per each bit set */
1667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1668 COSTS_N_INSNS (26), /* HI */
1669 COSTS_N_INSNS (42), /* SI */
1670 COSTS_N_INSNS (74), /* DI */
1671 COSTS_N_INSNS (74)}, /* other */
1672 COSTS_N_INSNS (1), /* cost of movsx */
1673 COSTS_N_INSNS (1), /* cost of movzx */
1674 8, /* "large" insn */
1675 17, /* MOVE_RATIO */
1676 4, /* cost for loading QImode using movzbl */
1677 {4, 4, 4}, /* cost of loading integer registers
1678 in QImode, HImode and SImode.
1679 Relative to reg-reg move (2). */
1680 {4, 4, 4}, /* cost of storing integer registers */
1681 4, /* cost of reg,reg fld/fst */
1682 {12, 12, 12}, /* cost of loading fp registers
1683 in SFmode, DFmode and XFmode */
1684 {6, 6, 8}, /* cost of storing fp registers
1685 in SFmode, DFmode and XFmode */
1686 2, /* cost of moving MMX register */
1687 {8, 8}, /* cost of loading MMX registers
1688 in SImode and DImode */
1689 {8, 8}, /* cost of storing MMX registers
1690 in SImode and DImode */
1691 2, /* cost of moving SSE register */
1692 {8, 8, 8}, /* cost of loading SSE registers
1693 in SImode, DImode and TImode */
1694 {8, 8, 8}, /* cost of storing SSE registers
1695 in SImode, DImode and TImode */
1696 5, /* MMX or SSE register to integer */
1697 32, /* size of l1 cache. */
1698 256, /* size of l2 cache. */
1699 64, /* size of prefetch block */
1700 6, /* number of parallel prefetches */
1701 3, /* Branch cost */
1702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1703 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1704 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1705 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1706 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1707 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1708 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1709 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1710 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1711 {{libcall, {{8, loop}, {15, unrolled_loop},
1712 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1713 {libcall, {{24, loop}, {32, unrolled_loop},
1714 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1715 1, /* scalar_stmt_cost. */
1716 1, /* scalar load_cost. */
1717 1, /* scalar_store_cost. */
1718 1, /* vec_stmt_cost. */
1719 1, /* vec_to_scalar_cost. */
1720 1, /* scalar_to_vec_cost. */
1721 1, /* vec_align_load_cost. */
1722 2, /* vec_unalign_load_cost. */
1723 1, /* vec_store_cost. */
1724 3, /* cond_taken_branch_cost. */
1725 1, /* cond_not_taken_branch_cost. */
1728 /* Generic64 should produce code tuned for Nocona and K8. */
1730 struct processor_costs generic64_cost = {
1731 COSTS_N_INSNS (1), /* cost of an add instruction */
1732 /* On all chips taken into consideration lea is 2 cycles and more. With
1733 this cost however our current implementation of synth_mult results in
1734 use of unnecessary temporary registers causing regression on several
1735 SPECfp benchmarks. */
1736 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1737 COSTS_N_INSNS (1), /* variable shift costs */
1738 COSTS_N_INSNS (1), /* constant shift costs */
1739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1740 COSTS_N_INSNS (4), /* HI */
1741 COSTS_N_INSNS (3), /* SI */
1742 COSTS_N_INSNS (4), /* DI */
1743 COSTS_N_INSNS (2)}, /* other */
1744 0, /* cost of multiply per each bit set */
1745 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1746 COSTS_N_INSNS (26), /* HI */
1747 COSTS_N_INSNS (42), /* SI */
1748 COSTS_N_INSNS (74), /* DI */
1749 COSTS_N_INSNS (74)}, /* other */
1750 COSTS_N_INSNS (1), /* cost of movsx */
1751 COSTS_N_INSNS (1), /* cost of movzx */
1752 8, /* "large" insn */
1753 17, /* MOVE_RATIO */
1754 4, /* cost for loading QImode using movzbl */
1755 {4, 4, 4}, /* cost of loading integer registers
1756 in QImode, HImode and SImode.
1757 Relative to reg-reg move (2). */
1758 {4, 4, 4}, /* cost of storing integer registers */
1759 4, /* cost of reg,reg fld/fst */
1760 {12, 12, 12}, /* cost of loading fp registers
1761 in SFmode, DFmode and XFmode */
1762 {6, 6, 8}, /* cost of storing fp registers
1763 in SFmode, DFmode and XFmode */
1764 2, /* cost of moving MMX register */
1765 {8, 8}, /* cost of loading MMX registers
1766 in SImode and DImode */
1767 {8, 8}, /* cost of storing MMX registers
1768 in SImode and DImode */
1769 2, /* cost of moving SSE register */
1770 {8, 8, 8}, /* cost of loading SSE registers
1771 in SImode, DImode and TImode */
1772 {8, 8, 8}, /* cost of storing SSE registers
1773 in SImode, DImode and TImode */
1774 5, /* MMX or SSE register to integer */
1775 32, /* size of l1 cache. */
1776 512, /* size of l2 cache. */
1777 64, /* size of prefetch block */
1778 6, /* number of parallel prefetches */
1779 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1780 value is increased to perhaps more appropriate value of 5. */
1781 3, /* Branch cost */
1782 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1783 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1784 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1785 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1786 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1787 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1788 {DUMMY_STRINGOP_ALGS,
1789 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1790 {DUMMY_STRINGOP_ALGS,
1791 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1792 1, /* scalar_stmt_cost. */
1793 1, /* scalar load_cost. */
1794 1, /* scalar_store_cost. */
1795 1, /* vec_stmt_cost. */
1796 1, /* vec_to_scalar_cost. */
1797 1, /* scalar_to_vec_cost. */
1798 1, /* vec_align_load_cost. */
1799 2, /* vec_unalign_load_cost. */
1800 1, /* vec_store_cost. */
1801 3, /* cond_taken_branch_cost. */
1802 1, /* cond_not_taken_branch_cost. */
1805 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1808 struct processor_costs generic32_cost = {
1809 COSTS_N_INSNS (1), /* cost of an add instruction */
1810 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1811 COSTS_N_INSNS (1), /* variable shift costs */
1812 COSTS_N_INSNS (1), /* constant shift costs */
1813 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1814 COSTS_N_INSNS (4), /* HI */
1815 COSTS_N_INSNS (3), /* SI */
1816 COSTS_N_INSNS (4), /* DI */
1817 COSTS_N_INSNS (2)}, /* other */
1818 0, /* cost of multiply per each bit set */
1819 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1820 COSTS_N_INSNS (26), /* HI */
1821 COSTS_N_INSNS (42), /* SI */
1822 COSTS_N_INSNS (74), /* DI */
1823 COSTS_N_INSNS (74)}, /* other */
1824 COSTS_N_INSNS (1), /* cost of movsx */
1825 COSTS_N_INSNS (1), /* cost of movzx */
1826 8, /* "large" insn */
1827 17, /* MOVE_RATIO */
1828 4, /* cost for loading QImode using movzbl */
1829 {4, 4, 4}, /* cost of loading integer registers
1830 in QImode, HImode and SImode.
1831 Relative to reg-reg move (2). */
1832 {4, 4, 4}, /* cost of storing integer registers */
1833 4, /* cost of reg,reg fld/fst */
1834 {12, 12, 12}, /* cost of loading fp registers
1835 in SFmode, DFmode and XFmode */
1836 {6, 6, 8}, /* cost of storing fp registers
1837 in SFmode, DFmode and XFmode */
1838 2, /* cost of moving MMX register */
1839 {8, 8}, /* cost of loading MMX registers
1840 in SImode and DImode */
1841 {8, 8}, /* cost of storing MMX registers
1842 in SImode and DImode */
1843 2, /* cost of moving SSE register */
1844 {8, 8, 8}, /* cost of loading SSE registers
1845 in SImode, DImode and TImode */
1846 {8, 8, 8}, /* cost of storing SSE registers
1847 in SImode, DImode and TImode */
1848 5, /* MMX or SSE register to integer */
1849 32, /* size of l1 cache. */
1850 256, /* size of l2 cache. */
1851 64, /* size of prefetch block */
1852 6, /* number of parallel prefetches */
1853 3, /* Branch cost */
1854 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1855 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1856 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1857 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1858 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1859 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1860 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1861 DUMMY_STRINGOP_ALGS},
1862 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1863 DUMMY_STRINGOP_ALGS},
1864 1, /* scalar_stmt_cost. */
1865 1, /* scalar load_cost. */
1866 1, /* scalar_store_cost. */
1867 1, /* vec_stmt_cost. */
1868 1, /* vec_to_scalar_cost. */
1869 1, /* scalar_to_vec_cost. */
1870 1, /* vec_align_load_cost. */
1871 2, /* vec_unalign_load_cost. */
1872 1, /* vec_store_cost. */
1873 3, /* cond_taken_branch_cost. */
1874 1, /* cond_not_taken_branch_cost. */
1877 const struct processor_costs *ix86_cost = &pentium_cost;
1879 /* Processor feature/optimization bitmasks. */
1880 #define m_386 (1<<PROCESSOR_I386)
1881 #define m_486 (1<<PROCESSOR_I486)
1882 #define m_PENT (1<<PROCESSOR_PENTIUM)
1883 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1884 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1885 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1886 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1887 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1888 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1889 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1890 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1891 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1892 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1893 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1894 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1895 #define m_ATOM (1<<PROCESSOR_ATOM)
1897 #define m_GEODE (1<<PROCESSOR_GEODE)
1898 #define m_K6 (1<<PROCESSOR_K6)
1899 #define m_K6_GEODE (m_K6 | m_GEODE)
1900 #define m_K8 (1<<PROCESSOR_K8)
1901 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1902 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1903 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1904 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1905 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1906 #define m_BDVER (m_BDVER1 | m_BDVER2)
1907 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1908 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1910 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1911 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1913 /* Generic instruction choice should be common subset of supported CPUs
1914 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1915 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1917 /* Feature tests against the various tunings. */
1918 unsigned char ix86_tune_features[X86_TUNE_LAST];
1920 /* Feature tests against the various tunings used to create ix86_tune_features
1921 based on the processor mask. */
1922 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1923 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1924 negatively, so enabling for Generic64 seems like good code size
1925 tradeoff. We can't enable it for 32bit generic because it does not
1926 work well with PPro base chips. */
1927 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1929 /* X86_TUNE_PUSH_MEMORY */
1930 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1932 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1935 /* X86_TUNE_UNROLL_STRLEN */
1936 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1938 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1939 on simulation result. But after P4 was made, no performance benefit
1940 was observed with branch hints. It also increases the code size.
1941 As a result, icc never generates branch hints. */
1944 /* X86_TUNE_DOUBLE_WITH_ADD */
1947 /* X86_TUNE_USE_SAHF */
1948 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1950 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1951 partial dependencies. */
1952 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1954 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1955 register stalls on Generic32 compilation setting as well. However
1956 in current implementation the partial register stalls are not eliminated
1957 very well - they can be introduced via subregs synthesized by combine
1958 and can happen in caller/callee saving sequences. Because this option
1959 pays back little on PPro based chips and is in conflict with partial reg
1960 dependencies used by Athlon/P4 based chips, it is better to leave it off
1961 for generic32 for now. */
1964 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1965 m_CORE2I7 | m_GENERIC,
1967 /* X86_TUNE_USE_HIMODE_FIOP */
1968 m_386 | m_486 | m_K6_GEODE,
1970 /* X86_TUNE_USE_SIMODE_FIOP */
1971 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1973 /* X86_TUNE_USE_MOV0 */
1976 /* X86_TUNE_USE_CLTD */
1977 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1979 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1982 /* X86_TUNE_SPLIT_LONG_MOVES */
1985 /* X86_TUNE_READ_MODIFY_WRITE */
1988 /* X86_TUNE_READ_MODIFY */
1991 /* X86_TUNE_PROMOTE_QIMODE */
1992 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1994 /* X86_TUNE_FAST_PREFIX */
1995 ~(m_386 | m_486 | m_PENT),
1997 /* X86_TUNE_SINGLE_STRINGOP */
1998 m_386 | m_P4_NOCONA,
2000 /* X86_TUNE_QIMODE_MATH */
2003 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2004 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2005 might be considered for Generic32 if our scheme for avoiding partial
2006 stalls was more effective. */
2009 /* X86_TUNE_PROMOTE_QI_REGS */
2012 /* X86_TUNE_PROMOTE_HI_REGS */
2015 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2016 over esp addition. */
2017 m_386 | m_486 | m_PENT | m_PPRO,
2019 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2020 over esp addition. */
2023 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2024 over esp subtraction. */
2025 m_386 | m_486 | m_PENT | m_K6_GEODE,
2027 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2028 over esp subtraction. */
2029 m_PENT | m_K6_GEODE,
2031 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2032 for DFmode copies */
2033 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2035 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2036 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2038 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2039 conflict here in between PPro/Pentium4 based chips that thread 128bit
2040 SSE registers as single units versus K8 based chips that divide SSE
2041 registers to two 64bit halves. This knob promotes all store destinations
2042 to be 128bit to allow register renaming on 128bit SSE units, but usually
2043 results in one extra microop on 64bit SSE units. Experimental results
2044 shows that disabling this option on P4 brings over 20% SPECfp regression,
2045 while enabling it on K8 brings roughly 2.4% regression that can be partly
2046 masked by careful scheduling of moves. */
2047 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2049 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2050 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2052 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2055 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2058 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2059 are resolved on SSE register parts instead of whole registers, so we may
2060 maintain just lower part of scalar values in proper format leaving the
2061 upper part undefined. */
2064 /* X86_TUNE_SSE_TYPELESS_STORES */
2067 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2068 m_PPRO | m_P4_NOCONA,
2070 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2071 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2073 /* X86_TUNE_PROLOGUE_USING_MOVE */
2074 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2076 /* X86_TUNE_EPILOGUE_USING_MOVE */
2077 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2079 /* X86_TUNE_SHIFT1 */
2082 /* X86_TUNE_USE_FFREEP */
2085 /* X86_TUNE_INTER_UNIT_MOVES */
2086 ~(m_AMD_MULTIPLE | m_GENERIC),
2088 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2089 ~(m_AMDFAM10 | m_BDVER ),
2091 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2092 than 4 branch instructions in the 16 byte window. */
2093 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2095 /* X86_TUNE_SCHEDULE */
2096 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2098 /* X86_TUNE_USE_BT */
2099 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2101 /* X86_TUNE_USE_INCDEC */
2102 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2104 /* X86_TUNE_PAD_RETURNS */
2105 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2107 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2110 /* X86_TUNE_EXT_80387_CONSTANTS */
2111 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2113 /* X86_TUNE_SHORTEN_X87_SSE */
2116 /* X86_TUNE_AVOID_VECTOR_DECODE */
2117 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2119 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2120 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2123 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2124 vector path on AMD machines. */
2125 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2127 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2129 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2131 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2135 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2136 but one byte longer. */
2139 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2140 operand that cannot be represented using a modRM byte. The XOR
2141 replacement is long decoded, so this split helps here as well. */
2144 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2146 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2148 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2149 from integer to FP. */
2152 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2153 with a subsequent conditional jump instruction into a single
2154 compare-and-branch uop. */
2157 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2158 will impact LEA instruction selection. */
2161 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2165 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2166 at -O3. For the moment, the prefetching seems badly tuned for Intel
2168 m_K6_GEODE | m_AMD_MULTIPLE,
2170 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2171 the auto-vectorizer. */
2174 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2175 during reassociation of integer computation. */
2178 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2179 during reassociation of fp computation. */
2183 /* Feature tests against the various architecture variations. */
2184 unsigned char ix86_arch_features[X86_ARCH_LAST];
2186 /* Feature tests against the various architecture variations, used to create
2187 ix86_arch_features based on the processor mask. */
2188 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2189 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2190 ~(m_386 | m_486 | m_PENT | m_K6),
2192 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2195 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2198 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2201 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2205 static const unsigned int x86_accumulate_outgoing_args
2206 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2208 static const unsigned int x86_arch_always_fancy_math_387
2209 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2211 static const unsigned int x86_avx256_split_unaligned_load
2212 = m_COREI7 | m_GENERIC;
2214 static const unsigned int x86_avx256_split_unaligned_store
2215 = m_COREI7 | m_BDVER | m_GENERIC;
2217 /* In case the average insn count for single function invocation is
2218 lower than this constant, emit fast (but longer) prologue and
2220 #define FAST_PROLOGUE_INSN_COUNT 20
2222 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2223 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2224 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2225 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2227 /* Array of the smallest class containing reg number REGNO, indexed by
2228 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2230 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2232 /* ax, dx, cx, bx */
2233 AREG, DREG, CREG, BREG,
2234 /* si, di, bp, sp */
2235 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2237 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2238 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2241 /* flags, fpsr, fpcr, frame */
2242 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2244 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2247 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2250 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2251 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2252 /* SSE REX registers */
2253 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2257 /* The "default" register map used in 32bit mode. */
2259 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2261 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2262 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2263 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2264 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2265 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2270 /* The "default" register map used in 64bit mode. */
2272 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2274 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2275 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2276 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2277 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2278 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2279 8,9,10,11,12,13,14,15, /* extended integer registers */
2280 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2283 /* Define the register numbers to be used in Dwarf debugging information.
2284 The SVR4 reference port C compiler uses the following register numbers
2285 in its Dwarf output code:
2286 0 for %eax (gcc regno = 0)
2287 1 for %ecx (gcc regno = 2)
2288 2 for %edx (gcc regno = 1)
2289 3 for %ebx (gcc regno = 3)
2290 4 for %esp (gcc regno = 7)
2291 5 for %ebp (gcc regno = 6)
2292 6 for %esi (gcc regno = 4)
2293 7 for %edi (gcc regno = 5)
2294 The following three DWARF register numbers are never generated by
2295 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2296 believes these numbers have these meanings.
2297 8 for %eip (no gcc equivalent)
2298 9 for %eflags (gcc regno = 17)
2299 10 for %trapno (no gcc equivalent)
2300 It is not at all clear how we should number the FP stack registers
2301 for the x86 architecture. If the version of SDB on x86/svr4 were
2302 a bit less brain dead with respect to floating-point then we would
2303 have a precedent to follow with respect to DWARF register numbers
2304 for x86 FP registers, but the SDB on x86/svr4 is so completely
2305 broken with respect to FP registers that it is hardly worth thinking
2306 of it as something to strive for compatibility with.
2307 The version of x86/svr4 SDB I have at the moment does (partially)
2308 seem to believe that DWARF register number 11 is associated with
2309 the x86 register %st(0), but that's about all. Higher DWARF
2310 register numbers don't seem to be associated with anything in
2311 particular, and even for DWARF regno 11, SDB only seems to under-
2312 stand that it should say that a variable lives in %st(0) (when
2313 asked via an `=' command) if we said it was in DWARF regno 11,
2314 but SDB still prints garbage when asked for the value of the
2315 variable in question (via a `/' command).
2316 (Also note that the labels SDB prints for various FP stack regs
2317 when doing an `x' command are all wrong.)
2318 Note that these problems generally don't affect the native SVR4
2319 C compiler because it doesn't allow the use of -O with -g and
2320 because when it is *not* optimizing, it allocates a memory
2321 location for each floating-point variable, and the memory
2322 location is what gets described in the DWARF AT_location
2323 attribute for the variable in question.
2324 Regardless of the severe mental illness of the x86/svr4 SDB, we
2325 do something sensible here and we use the following DWARF
2326 register numbers. Note that these are all stack-top-relative
2328 11 for %st(0) (gcc regno = 8)
2329 12 for %st(1) (gcc regno = 9)
2330 13 for %st(2) (gcc regno = 10)
2331 14 for %st(3) (gcc regno = 11)
2332 15 for %st(4) (gcc regno = 12)
2333 16 for %st(5) (gcc regno = 13)
2334 17 for %st(6) (gcc regno = 14)
2335 18 for %st(7) (gcc regno = 15)
2337 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2339 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2340 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2341 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2342 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2343 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2345 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2348 /* Define parameter passing and return registers. */
2350 static int const x86_64_int_parameter_registers[6] =
2352 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2355 static int const x86_64_ms_abi_int_parameter_registers[4] =
2357 CX_REG, DX_REG, R8_REG, R9_REG
2360 static int const x86_64_int_return_registers[4] =
2362 AX_REG, DX_REG, DI_REG, SI_REG
2365 /* Define the structure for the machine field in struct function. */
2367 struct GTY(()) stack_local_entry {
2368 unsigned short mode;
2371 struct stack_local_entry *next;
2374 /* Structure describing stack frame layout.
2375 Stack grows downward:
2381 saved static chain if ix86_static_chain_on_stack
2383 saved frame pointer if frame_pointer_needed
2384 <- HARD_FRAME_POINTER
2390 <- sse_regs_save_offset
2393 [va_arg registers] |
2397 [padding2] | = to_allocate
2406 int outgoing_arguments_size;
2407 HOST_WIDE_INT frame;
2409 /* The offsets relative to ARG_POINTER. */
2410 HOST_WIDE_INT frame_pointer_offset;
2411 HOST_WIDE_INT hard_frame_pointer_offset;
2412 HOST_WIDE_INT stack_pointer_offset;
2413 HOST_WIDE_INT hfp_save_offset;
2414 HOST_WIDE_INT reg_save_offset;
2415 HOST_WIDE_INT sse_reg_save_offset;
2417 /* When save_regs_using_mov is set, emit prologue using
2418 move instead of push instructions. */
2419 bool save_regs_using_mov;
2422 /* Which cpu are we scheduling for. */
2423 enum attr_cpu ix86_schedule;
2425 /* Which cpu are we optimizing for. */
2426 enum processor_type ix86_tune;
2428 /* Which instruction set architecture to use. */
2429 enum processor_type ix86_arch;
2431 /* True if processor has SSE prefetch instruction. */
2432 int x86_prefetch_sse;
2434 /* True if processor has prefetchw instruction. */
2437 /* -mstackrealign option */
2438 static const char ix86_force_align_arg_pointer_string[]
2439 = "force_align_arg_pointer";
2441 static rtx (*ix86_gen_leave) (void);
2442 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2443 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2444 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2445 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2446 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2447 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2448 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2449 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2450 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2452 /* Preferred alignment for stack boundary in bits. */
2453 unsigned int ix86_preferred_stack_boundary;
2455 /* Alignment for incoming stack boundary in bits specified at
2457 static unsigned int ix86_user_incoming_stack_boundary;
2459 /* Default alignment for incoming stack boundary in bits. */
2460 static unsigned int ix86_default_incoming_stack_boundary;
2462 /* Alignment for incoming stack boundary in bits. */
2463 unsigned int ix86_incoming_stack_boundary;
2465 /* Calling abi specific va_list type nodes. */
2466 static GTY(()) tree sysv_va_list_type_node;
2467 static GTY(()) tree ms_va_list_type_node;
2469 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2470 char internal_label_prefix[16];
2471 int internal_label_prefix_len;
2473 /* Fence to use after loop using movnt. */
2476 /* Register class used for passing given 64bit part of the argument.
2477 These represent classes as documented by the PS ABI, with the exception
2478 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2479 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2481 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2482 whenever possible (upper half does contain padding). */
2483 enum x86_64_reg_class
2486 X86_64_INTEGER_CLASS,
2487 X86_64_INTEGERSI_CLASS,
2494 X86_64_COMPLEX_X87_CLASS,
2498 #define MAX_CLASSES 4
2500 /* Table of constants used by fldpi, fldln2, etc.... */
2501 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2502 static bool ext_80387_constants_init = 0;
2505 static struct machine_function * ix86_init_machine_status (void);
2506 static rtx ix86_function_value (const_tree, const_tree, bool);
2507 static bool ix86_function_value_regno_p (const unsigned int);
2508 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2510 static rtx ix86_static_chain (const_tree, bool);
2511 static int ix86_function_regparm (const_tree, const_tree);
2512 static void ix86_compute_frame_layout (struct ix86_frame *);
2513 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2515 static void ix86_add_new_builtins (HOST_WIDE_INT);
2516 static tree ix86_canonical_va_list_type (tree);
2517 static void predict_jump (int);
2518 static unsigned int split_stack_prologue_scratch_regno (void);
2519 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2521 enum ix86_function_specific_strings
2523 IX86_FUNCTION_SPECIFIC_ARCH,
2524 IX86_FUNCTION_SPECIFIC_TUNE,
2525 IX86_FUNCTION_SPECIFIC_MAX
2528 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2529 const char *, enum fpmath_unit, bool);
2530 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2531 static void ix86_function_specific_save (struct cl_target_option *);
2532 static void ix86_function_specific_restore (struct cl_target_option *);
2533 static void ix86_function_specific_print (FILE *, int,
2534 struct cl_target_option *);
2535 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2536 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2537 struct gcc_options *);
2538 static bool ix86_can_inline_p (tree, tree);
2539 static void ix86_set_current_function (tree);
2540 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2542 static enum calling_abi ix86_function_abi (const_tree);
2545 #ifndef SUBTARGET32_DEFAULT_CPU
2546 #define SUBTARGET32_DEFAULT_CPU "i386"
2549 /* The svr4 ABI for the i386 says that records and unions are returned
2551 #ifndef DEFAULT_PCC_STRUCT_RETURN
2552 #define DEFAULT_PCC_STRUCT_RETURN 1
2555 /* Whether -mtune= or -march= were specified */
2556 static int ix86_tune_defaulted;
2557 static int ix86_arch_specified;
2559 /* Vectorization library interface and handlers. */
2560 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2562 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2563 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2565 /* Processor target table, indexed by processor number */
2568 const struct processor_costs *cost; /* Processor costs */
2569 const int align_loop; /* Default alignments. */
2570 const int align_loop_max_skip;
2571 const int align_jump;
2572 const int align_jump_max_skip;
2573 const int align_func;
2576 static const struct ptt processor_target_table[PROCESSOR_max] =
2578 {&i386_cost, 4, 3, 4, 3, 4},
2579 {&i486_cost, 16, 15, 16, 15, 16},
2580 {&pentium_cost, 16, 7, 16, 7, 16},
2581 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2582 {&geode_cost, 0, 0, 0, 0, 0},
2583 {&k6_cost, 32, 7, 32, 7, 32},
2584 {&athlon_cost, 16, 7, 16, 7, 16},
2585 {&pentium4_cost, 0, 0, 0, 0, 0},
2586 {&k8_cost, 16, 7, 16, 7, 16},
2587 {&nocona_cost, 0, 0, 0, 0, 0},
2588 /* Core 2 32-bit. */
2589 {&generic32_cost, 16, 10, 16, 10, 16},
2590 /* Core 2 64-bit. */
2591 {&generic64_cost, 16, 10, 16, 10, 16},
2592 /* Core i7 32-bit. */
2593 {&generic32_cost, 16, 10, 16, 10, 16},
2594 /* Core i7 64-bit. */
2595 {&generic64_cost, 16, 10, 16, 10, 16},
2596 {&generic32_cost, 16, 7, 16, 7, 16},
2597 {&generic64_cost, 16, 10, 16, 10, 16},
2598 {&amdfam10_cost, 32, 24, 32, 7, 32},
2599 {&bdver1_cost, 32, 24, 32, 7, 32},
2600 {&bdver2_cost, 32, 24, 32, 7, 32},
2601 {&btver1_cost, 32, 24, 32, 7, 32},
2602 {&atom_cost, 16, 15, 16, 7, 16}
2605 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2635 /* Return true if a red-zone is in use. */
2638 ix86_using_red_zone (void)
2640 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2643 /* Return a string that documents the current -m options. The caller is
2644 responsible for freeing the string. */
2647 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2648 const char *tune, enum fpmath_unit fpmath,
2651 struct ix86_target_opts
2653 const char *option; /* option string */
2654 HOST_WIDE_INT mask; /* isa mask options */
2657 /* This table is ordered so that options like -msse4.2 that imply
2658 preceding options while match those first. */
2659 static struct ix86_target_opts isa_opts[] =
2661 { "-m64", OPTION_MASK_ISA_64BIT },
2662 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2663 { "-mfma", OPTION_MASK_ISA_FMA },
2664 { "-mxop", OPTION_MASK_ISA_XOP },
2665 { "-mlwp", OPTION_MASK_ISA_LWP },
2666 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2667 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2668 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2669 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2670 { "-msse3", OPTION_MASK_ISA_SSE3 },
2671 { "-msse2", OPTION_MASK_ISA_SSE2 },
2672 { "-msse", OPTION_MASK_ISA_SSE },
2673 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2674 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2675 { "-mmmx", OPTION_MASK_ISA_MMX },
2676 { "-mabm", OPTION_MASK_ISA_ABM },
2677 { "-mbmi", OPTION_MASK_ISA_BMI },
2678 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2679 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2680 { "-mtbm", OPTION_MASK_ISA_TBM },
2681 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2682 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2683 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2684 { "-maes", OPTION_MASK_ISA_AES },
2685 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2686 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2687 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2688 { "-mf16c", OPTION_MASK_ISA_F16C },
2692 static struct ix86_target_opts flag_opts[] =
2694 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2695 { "-m80387", MASK_80387 },
2696 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2697 { "-malign-double", MASK_ALIGN_DOUBLE },
2698 { "-mcld", MASK_CLD },
2699 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2700 { "-mieee-fp", MASK_IEEE_FP },
2701 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2702 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2703 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2704 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2705 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2706 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2707 { "-mno-red-zone", MASK_NO_RED_ZONE },
2708 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2709 { "-mrecip", MASK_RECIP },
2710 { "-mrtd", MASK_RTD },
2711 { "-msseregparm", MASK_SSEREGPARM },
2712 { "-mstack-arg-probe", MASK_STACK_PROBE },
2713 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2714 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2715 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2716 { "-mvzeroupper", MASK_VZEROUPPER },
2717 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2718 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2719 { "-mprefer-avx128", MASK_PREFER_AVX128},
2722 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2725 char target_other[40];
2734 memset (opts, '\0', sizeof (opts));
2736 /* Add -march= option. */
2739 opts[num][0] = "-march=";
2740 opts[num++][1] = arch;
2743 /* Add -mtune= option. */
2746 opts[num][0] = "-mtune=";
2747 opts[num++][1] = tune;
2750 /* Pick out the options in isa options. */
2751 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2753 if ((isa & isa_opts[i].mask) != 0)
2755 opts[num++][0] = isa_opts[i].option;
2756 isa &= ~ isa_opts[i].mask;
2760 if (isa && add_nl_p)
2762 opts[num++][0] = isa_other;
2763 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2767 /* Add flag options. */
2768 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2770 if ((flags & flag_opts[i].mask) != 0)
2772 opts[num++][0] = flag_opts[i].option;
2773 flags &= ~ flag_opts[i].mask;
2777 if (flags && add_nl_p)
2779 opts[num++][0] = target_other;
2780 sprintf (target_other, "(other flags: %#x)", flags);
2783 /* Add -fpmath= option. */
2786 opts[num][0] = "-mfpmath=";
2787 switch ((int) fpmath)
2790 opts[num++][1] = "387";
2794 opts[num++][1] = "sse";
2797 case FPMATH_387 | FPMATH_SSE:
2798 opts[num++][1] = "sse+387";
2810 gcc_assert (num < ARRAY_SIZE (opts));
2812 /* Size the string. */
2814 sep_len = (add_nl_p) ? 3 : 1;
2815 for (i = 0; i < num; i++)
2818 for (j = 0; j < 2; j++)
2820 len += strlen (opts[i][j]);
2823 /* Build the string. */
2824 ret = ptr = (char *) xmalloc (len);
2827 for (i = 0; i < num; i++)
2831 for (j = 0; j < 2; j++)
2832 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2839 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2847 for (j = 0; j < 2; j++)
2850 memcpy (ptr, opts[i][j], len2[j]);
2852 line_len += len2[j];
2857 gcc_assert (ret + len >= ptr);
2862 /* Return true, if profiling code should be emitted before
2863 prologue. Otherwise it returns false.
2864 Note: For x86 with "hotfix" it is sorried. */
2866 ix86_profile_before_prologue (void)
2868 return flag_fentry != 0;
2871 /* Function that is callable from the debugger to print the current
2874 ix86_debug_options (void)
2876 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2877 ix86_arch_string, ix86_tune_string,
2882 fprintf (stderr, "%s\n\n", opts);
2886 fputs ("<no options>\n\n", stderr);
2891 /* Override various settings based on options. If MAIN_ARGS_P, the
2892 options are from the command line, otherwise they are from
2896 ix86_option_override_internal (bool main_args_p)
2899 unsigned int ix86_arch_mask, ix86_tune_mask;
2900 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2905 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2906 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2907 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2908 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2909 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2910 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2911 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2912 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2913 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2914 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2915 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2916 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2917 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2918 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2919 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2920 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2921 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2922 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2923 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2924 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2925 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2926 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2927 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2928 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2929 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2930 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2931 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2932 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2933 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2934 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2935 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2936 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2937 #define PTA_PREFETCHW (HOST_WIDE_INT_1 << 32)
2939 /* if this reaches 64, need to widen struct pta flags below */
2943 const char *const name; /* processor name or nickname. */
2944 const enum processor_type processor;
2945 const enum attr_cpu schedule;
2946 const unsigned HOST_WIDE_INT flags;
2948 const processor_alias_table[] =
2950 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2951 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2952 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2953 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2954 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2955 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2956 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2957 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2958 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2959 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2960 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2961 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2962 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2964 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2966 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2967 PTA_MMX | PTA_SSE | PTA_SSE2},
2968 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2969 PTA_MMX |PTA_SSE | PTA_SSE2},
2970 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2971 PTA_MMX | PTA_SSE | PTA_SSE2},
2972 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2973 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2974 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2975 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2976 | PTA_CX16 | PTA_NO_SAHF},
2977 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2978 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2979 | PTA_SSSE3 | PTA_CX16},
2980 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2981 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2982 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2983 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2984 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2985 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2986 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2987 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
2988 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2989 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2990 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2991 | PTA_RDRND | PTA_F16C},
2992 {"core-avx2", PROCESSOR_COREI7_64, CPU_COREI7,
2993 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2994 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
2995 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2996 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
2997 | PTA_FMA | PTA_MOVBE},
2998 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2999 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3000 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3001 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3002 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3003 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3004 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3005 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3006 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3007 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3008 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3009 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3010 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3011 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3012 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3013 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3014 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3015 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3016 {"x86-64", PROCESSOR_K8, CPU_K8,
3017 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3018 {"k8", PROCESSOR_K8, CPU_K8,
3019 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3020 | PTA_SSE2 | PTA_NO_SAHF},
3021 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3022 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3023 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3024 {"opteron", PROCESSOR_K8, CPU_K8,
3025 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3026 | PTA_SSE2 | PTA_NO_SAHF},
3027 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3028 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3029 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3030 {"athlon64", PROCESSOR_K8, CPU_K8,
3031 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3032 | PTA_SSE2 | PTA_NO_SAHF},
3033 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3034 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3035 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3036 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3037 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3038 | PTA_SSE2 | PTA_NO_SAHF},
3039 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3040 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3041 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3042 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3043 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3044 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3045 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3046 PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
3047 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
3048 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3049 | PTA_FMA4 | PTA_XOP | PTA_LWP},
3050 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3051 PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
3052 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
3053 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3054 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3056 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3057 PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
3058 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16},
3059 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3060 0 /* flags are only used for -march switch. */ },
3061 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3062 PTA_64BIT /* flags are only used for -march switch. */ },
3065 /* -mrecip options. */
3068 const char *string; /* option name */
3069 unsigned int mask; /* mask bits to set */
3071 const recip_options[] =
3073 { "all", RECIP_MASK_ALL },
3074 { "none", RECIP_MASK_NONE },
3075 { "div", RECIP_MASK_DIV },
3076 { "sqrt", RECIP_MASK_SQRT },
3077 { "vec-div", RECIP_MASK_VEC_DIV },
3078 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3081 int const pta_size = ARRAY_SIZE (processor_alias_table);
3083 /* Set up prefix/suffix so the error messages refer to either the command
3084 line argument, or the attribute(target). */
3093 prefix = "option(\"";
3098 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3099 SUBTARGET_OVERRIDE_OPTIONS;
3102 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3103 SUBSUBTARGET_OVERRIDE_OPTIONS;
3107 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3109 /* -fPIC is the default for x86_64. */
3110 if (TARGET_MACHO && TARGET_64BIT)
3113 /* Need to check -mtune=generic first. */
3114 if (ix86_tune_string)
3116 if (!strcmp (ix86_tune_string, "generic")
3117 || !strcmp (ix86_tune_string, "i686")
3118 /* As special support for cross compilers we read -mtune=native
3119 as -mtune=generic. With native compilers we won't see the
3120 -mtune=native, as it was changed by the driver. */
3121 || !strcmp (ix86_tune_string, "native"))
3124 ix86_tune_string = "generic64";
3126 ix86_tune_string = "generic32";
3128 /* If this call is for setting the option attribute, allow the
3129 generic32/generic64 that was previously set. */
3130 else if (!main_args_p
3131 && (!strcmp (ix86_tune_string, "generic32")
3132 || !strcmp (ix86_tune_string, "generic64")))
3134 else if (!strncmp (ix86_tune_string, "generic", 7))
3135 error ("bad value (%s) for %stune=%s %s",
3136 ix86_tune_string, prefix, suffix, sw);
3137 else if (!strcmp (ix86_tune_string, "x86-64"))
3138 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3139 "%stune=k8%s or %stune=generic%s instead as appropriate",
3140 prefix, suffix, prefix, suffix, prefix, suffix);
3144 if (ix86_arch_string)
3145 ix86_tune_string = ix86_arch_string;
3146 if (!ix86_tune_string)
3148 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3149 ix86_tune_defaulted = 1;
3152 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3153 need to use a sensible tune option. */
3154 if (!strcmp (ix86_tune_string, "generic")
3155 || !strcmp (ix86_tune_string, "x86-64")
3156 || !strcmp (ix86_tune_string, "i686"))
3159 ix86_tune_string = "generic64";
3161 ix86_tune_string = "generic32";
3165 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3167 /* rep; movq isn't available in 32-bit code. */
3168 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3169 ix86_stringop_alg = no_stringop;
3172 if (!ix86_arch_string)
3173 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3175 ix86_arch_specified = 1;
3177 if (!global_options_set.x_ix86_abi)
3178 ix86_abi = DEFAULT_ABI;
3180 if (global_options_set.x_ix86_cmodel)
3182 switch (ix86_cmodel)
3187 ix86_cmodel = CM_SMALL_PIC;
3189 error ("code model %qs not supported in the %s bit mode",
3196 ix86_cmodel = CM_MEDIUM_PIC;
3198 error ("code model %qs not supported in the %s bit mode",
3200 else if (TARGET_X32)
3201 error ("code model %qs not supported in x32 mode",
3208 ix86_cmodel = CM_LARGE_PIC;
3210 error ("code model %qs not supported in the %s bit mode",
3212 else if (TARGET_X32)
3213 error ("code model %qs not supported in x32 mode",
3219 error ("code model %s does not support PIC mode", "32");
3221 error ("code model %qs not supported in the %s bit mode",
3228 error ("code model %s does not support PIC mode", "kernel");
3229 ix86_cmodel = CM_32;
3232 error ("code model %qs not supported in the %s bit mode",
3242 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3243 use of rip-relative addressing. This eliminates fixups that
3244 would otherwise be needed if this object is to be placed in a
3245 DLL, and is essentially just as efficient as direct addressing. */
3246 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3247 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3248 else if (TARGET_64BIT)
3249 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3251 ix86_cmodel = CM_32;
3253 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3255 error ("-masm=intel not supported in this configuration");
3256 ix86_asm_dialect = ASM_ATT;
3258 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3259 sorry ("%i-bit mode not compiled in",
3260 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3262 for (i = 0; i < pta_size; i++)
3263 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3265 ix86_schedule = processor_alias_table[i].schedule;
3266 ix86_arch = processor_alias_table[i].processor;
3267 /* Default cpu tuning to the architecture. */
3268 ix86_tune = ix86_arch;
3270 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3271 error ("CPU you selected does not support x86-64 "
3274 if (processor_alias_table[i].flags & PTA_MMX
3275 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3276 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3277 if (processor_alias_table[i].flags & PTA_3DNOW
3278 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3279 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3280 if (processor_alias_table[i].flags & PTA_3DNOW_A
3281 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3282 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3283 if (processor_alias_table[i].flags & PTA_SSE
3284 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3285 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3286 if (processor_alias_table[i].flags & PTA_SSE2
3287 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3288 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3289 if (processor_alias_table[i].flags & PTA_SSE3
3290 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3291 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3292 if (processor_alias_table[i].flags & PTA_SSSE3
3293 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3294 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3295 if (processor_alias_table[i].flags & PTA_SSE4_1
3296 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3297 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3298 if (processor_alias_table[i].flags & PTA_SSE4_2
3299 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3300 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3301 if (processor_alias_table[i].flags & PTA_AVX
3302 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3303 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3304 if (processor_alias_table[i].flags & PTA_AVX2
3305 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3306 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3307 if (processor_alias_table[i].flags & PTA_FMA
3308 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3309 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3310 if (processor_alias_table[i].flags & PTA_SSE4A
3311 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3312 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3313 if (processor_alias_table[i].flags & PTA_FMA4
3314 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3315 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3316 if (processor_alias_table[i].flags & PTA_XOP
3317 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3318 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3319 if (processor_alias_table[i].flags & PTA_LWP
3320 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3321 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3322 if (processor_alias_table[i].flags & PTA_ABM
3323 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3324 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3325 if (processor_alias_table[i].flags & PTA_BMI
3326 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3327 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3328 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3329 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3330 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3331 if (processor_alias_table[i].flags & PTA_TBM
3332 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3333 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3334 if (processor_alias_table[i].flags & PTA_BMI2
3335 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3336 ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3337 if (processor_alias_table[i].flags & PTA_CX16
3338 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3339 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3340 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3341 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3342 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3343 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3344 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3345 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3346 if (processor_alias_table[i].flags & PTA_MOVBE
3347 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3348 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3349 if (processor_alias_table[i].flags & PTA_AES
3350 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3351 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3352 if (processor_alias_table[i].flags & PTA_PCLMUL
3353 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3354 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3355 if (processor_alias_table[i].flags & PTA_FSGSBASE
3356 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3357 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3358 if (processor_alias_table[i].flags & PTA_RDRND
3359 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3360 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3361 if (processor_alias_table[i].flags & PTA_F16C
3362 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3363 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3364 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3365 x86_prefetch_sse = true;
3366 if (processor_alias_table[i].flags & PTA_PREFETCHW)
3367 x86_prefetchw = true;
3372 if (!strcmp (ix86_arch_string, "generic"))
3373 error ("generic CPU can be used only for %stune=%s %s",
3374 prefix, suffix, sw);
3375 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3376 error ("bad value (%s) for %sarch=%s %s",
3377 ix86_arch_string, prefix, suffix, sw);
3379 ix86_arch_mask = 1u << ix86_arch;
3380 for (i = 0; i < X86_ARCH_LAST; ++i)
3381 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3383 for (i = 0; i < pta_size; i++)
3384 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3386 ix86_schedule = processor_alias_table[i].schedule;
3387 ix86_tune = processor_alias_table[i].processor;
3390 if (!(processor_alias_table[i].flags & PTA_64BIT))
3392 if (ix86_tune_defaulted)
3394 ix86_tune_string = "x86-64";
3395 for (i = 0; i < pta_size; i++)
3396 if (! strcmp (ix86_tune_string,
3397 processor_alias_table[i].name))
3399 ix86_schedule = processor_alias_table[i].schedule;
3400 ix86_tune = processor_alias_table[i].processor;
3403 error ("CPU you selected does not support x86-64 "
3409 /* Adjust tuning when compiling for 32-bit ABI. */
3412 case PROCESSOR_GENERIC64:
3413 ix86_tune = PROCESSOR_GENERIC32;
3414 ix86_schedule = CPU_PENTIUMPRO;
3417 case PROCESSOR_CORE2_64:
3418 ix86_tune = PROCESSOR_CORE2_32;
3421 case PROCESSOR_COREI7_64:
3422 ix86_tune = PROCESSOR_COREI7_32;
3429 /* Intel CPUs have always interpreted SSE prefetch instructions as
3430 NOPs; so, we can enable SSE prefetch instructions even when
3431 -mtune (rather than -march) points us to a processor that has them.
3432 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3433 higher processors. */
3435 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3436 x86_prefetch_sse = true;
3440 if (ix86_tune_specified && i == pta_size)
3441 error ("bad value (%s) for %stune=%s %s",
3442 ix86_tune_string, prefix, suffix, sw);
3444 ix86_tune_mask = 1u << ix86_tune;
3445 for (i = 0; i < X86_TUNE_LAST; ++i)
3446 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3448 #ifndef USE_IX86_FRAME_POINTER
3449 #define USE_IX86_FRAME_POINTER 0
3452 #ifndef USE_X86_64_FRAME_POINTER
3453 #define USE_X86_64_FRAME_POINTER 0
3456 /* Set the default values for switches whose default depends on TARGET_64BIT
3457 in case they weren't overwritten by command line options. */
3460 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3461 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3462 if (flag_asynchronous_unwind_tables == 2)
3463 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3464 if (flag_pcc_struct_return == 2)
3465 flag_pcc_struct_return = 0;
3469 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3470 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3471 if (flag_asynchronous_unwind_tables == 2)
3472 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3473 if (flag_pcc_struct_return == 2)
3474 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3478 ix86_cost = &ix86_size_cost;
3480 ix86_cost = processor_target_table[ix86_tune].cost;
3482 /* Arrange to set up i386_stack_locals for all functions. */
3483 init_machine_status = ix86_init_machine_status;
3485 /* Validate -mregparm= value. */
3486 if (global_options_set.x_ix86_regparm)
3489 warning (0, "-mregparm is ignored in 64-bit mode");
3490 if (ix86_regparm > REGPARM_MAX)
3492 error ("-mregparm=%d is not between 0 and %d",
3493 ix86_regparm, REGPARM_MAX);
3498 ix86_regparm = REGPARM_MAX;
3500 /* Default align_* from the processor table. */
3501 if (align_loops == 0)
3503 align_loops = processor_target_table[ix86_tune].align_loop;
3504 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3506 if (align_jumps == 0)
3508 align_jumps = processor_target_table[ix86_tune].align_jump;
3509 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3511 if (align_functions == 0)
3513 align_functions = processor_target_table[ix86_tune].align_func;
3516 /* Provide default for -mbranch-cost= value. */
3517 if (!global_options_set.x_ix86_branch_cost)
3518 ix86_branch_cost = ix86_cost->branch_cost;
3522 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3524 /* Enable by default the SSE and MMX builtins. Do allow the user to
3525 explicitly disable any of these. In particular, disabling SSE and
3526 MMX for kernel code is extremely useful. */
3527 if (!ix86_arch_specified)
3529 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3530 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3533 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3537 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3539 if (!ix86_arch_specified)
3541 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3543 /* i386 ABI does not specify red zone. It still makes sense to use it
3544 when programmer takes care to stack from being destroyed. */
3545 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3546 target_flags |= MASK_NO_RED_ZONE;
3549 /* Keep nonleaf frame pointers. */
3550 if (flag_omit_frame_pointer)
3551 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3552 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3553 flag_omit_frame_pointer = 1;
3555 /* If we're doing fast math, we don't care about comparison order
3556 wrt NaNs. This lets us use a shorter comparison sequence. */
3557 if (flag_finite_math_only)
3558 target_flags &= ~MASK_IEEE_FP;
3560 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3561 since the insns won't need emulation. */
3562 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3563 target_flags &= ~MASK_NO_FANCY_MATH_387;
3565 /* Likewise, if the target doesn't have a 387, or we've specified
3566 software floating point, don't use 387 inline intrinsics. */
3568 target_flags |= MASK_NO_FANCY_MATH_387;
3570 /* Turn on MMX builtins for -msse. */
3573 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3574 x86_prefetch_sse = true;
3577 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3578 if (TARGET_SSE4_2 || TARGET_ABM)
3579 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3581 /* Turn on lzcnt instruction for -mabm. */
3583 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3585 /* Validate -mpreferred-stack-boundary= value or default it to
3586 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3587 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3588 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3590 int min = (TARGET_64BIT ? 4 : 2);
3591 int max = (TARGET_SEH ? 4 : 12);
3593 if (ix86_preferred_stack_boundary_arg < min
3594 || ix86_preferred_stack_boundary_arg > max)
3597 error ("-mpreferred-stack-boundary is not supported "
3600 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3601 ix86_preferred_stack_boundary_arg, min, max);
3604 ix86_preferred_stack_boundary
3605 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3608 /* Set the default value for -mstackrealign. */
3609 if (ix86_force_align_arg_pointer == -1)
3610 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3612 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3614 /* Validate -mincoming-stack-boundary= value or default it to
3615 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3616 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3617 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3619 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3620 || ix86_incoming_stack_boundary_arg > 12)
3621 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3622 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3625 ix86_user_incoming_stack_boundary
3626 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3627 ix86_incoming_stack_boundary
3628 = ix86_user_incoming_stack_boundary;
3632 /* Accept -msseregparm only if at least SSE support is enabled. */
3633 if (TARGET_SSEREGPARM
3635 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3637 if (global_options_set.x_ix86_fpmath)
3639 if (ix86_fpmath & FPMATH_SSE)
3643 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3644 ix86_fpmath = FPMATH_387;
3646 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3648 warning (0, "387 instruction set disabled, using SSE arithmetics");
3649 ix86_fpmath = FPMATH_SSE;
3654 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3656 /* If the i387 is disabled, then do not return values in it. */
3658 target_flags &= ~MASK_FLOAT_RETURNS;
3660 /* Use external vectorized library in vectorizing intrinsics. */
3661 if (global_options_set.x_ix86_veclibabi_type)
3662 switch (ix86_veclibabi_type)
3664 case ix86_veclibabi_type_svml:
3665 ix86_veclib_handler = ix86_veclibabi_svml;
3668 case ix86_veclibabi_type_acml:
3669 ix86_veclib_handler = ix86_veclibabi_acml;
3676 if ((!USE_IX86_FRAME_POINTER
3677 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3678 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3680 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3682 /* ??? Unwind info is not correct around the CFG unless either a frame
3683 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3684 unwind info generation to be aware of the CFG and propagating states
3686 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3687 || flag_exceptions || flag_non_call_exceptions)
3688 && flag_omit_frame_pointer
3689 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3691 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3692 warning (0, "unwind tables currently require either a frame pointer "
3693 "or %saccumulate-outgoing-args%s for correctness",
3695 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3698 /* If stack probes are required, the space used for large function
3699 arguments on the stack must also be probed, so enable
3700 -maccumulate-outgoing-args so this happens in the prologue. */
3701 if (TARGET_STACK_PROBE
3702 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3704 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3705 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3706 "for correctness", prefix, suffix);
3707 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3710 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3713 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3714 p = strchr (internal_label_prefix, 'X');
3715 internal_label_prefix_len = p - internal_label_prefix;
3719 /* When scheduling description is not available, disable scheduler pass
3720 so it won't slow down the compilation and make x87 code slower. */
3721 if (!TARGET_SCHEDULE)
3722 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3724 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3725 ix86_cost->simultaneous_prefetches,
3726 global_options.x_param_values,
3727 global_options_set.x_param_values);
3728 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3729 global_options.x_param_values,
3730 global_options_set.x_param_values);
3731 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3732 global_options.x_param_values,
3733 global_options_set.x_param_values);
3734 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3735 global_options.x_param_values,
3736 global_options_set.x_param_values);
3738 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3739 if (flag_prefetch_loop_arrays < 0
3742 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3743 flag_prefetch_loop_arrays = 1;
3745 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3746 can be optimized to ap = __builtin_next_arg (0). */
3747 if (!TARGET_64BIT && !flag_split_stack)
3748 targetm.expand_builtin_va_start = NULL;
3752 ix86_gen_leave = gen_leave_rex64;
3753 ix86_gen_add3 = gen_adddi3;
3754 ix86_gen_sub3 = gen_subdi3;
3755 ix86_gen_sub3_carry = gen_subdi3_carry;
3756 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3757 ix86_gen_monitor = gen_sse3_monitor64;
3758 ix86_gen_andsp = gen_anddi3;
3759 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3760 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3761 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3765 ix86_gen_leave = gen_leave;
3766 ix86_gen_add3 = gen_addsi3;
3767 ix86_gen_sub3 = gen_subsi3;
3768 ix86_gen_sub3_carry = gen_subsi3_carry;
3769 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3770 ix86_gen_monitor = gen_sse3_monitor;
3771 ix86_gen_andsp = gen_andsi3;
3772 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3773 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3774 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3778 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3780 target_flags |= MASK_CLD & ~target_flags_explicit;
3783 if (!TARGET_64BIT && flag_pic)
3785 if (flag_fentry > 0)
3786 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3790 else if (TARGET_SEH)
3792 if (flag_fentry == 0)
3793 sorry ("-mno-fentry isn%'t compatible with SEH");
3796 else if (flag_fentry < 0)
3798 #if defined(PROFILE_BEFORE_PROLOGUE)
3807 /* When not optimize for size, enable vzeroupper optimization for
3808 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3809 AVX unaligned load/store. */
3812 if (flag_expensive_optimizations
3813 && !(target_flags_explicit & MASK_VZEROUPPER))
3814 target_flags |= MASK_VZEROUPPER;
3815 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3816 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3817 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3818 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3819 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3820 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3821 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3822 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3823 target_flags |= MASK_PREFER_AVX128;
3828 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3829 target_flags &= ~MASK_VZEROUPPER;
3832 if (ix86_recip_name)
3834 char *p = ASTRDUP (ix86_recip_name);
3836 unsigned int mask, i;
3839 while ((q = strtok (p, ",")) != NULL)
3850 if (!strcmp (q, "default"))
3851 mask = RECIP_MASK_ALL;
3854 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
3855 if (!strcmp (q, recip_options[i].string))
3857 mask = recip_options[i].mask;
3861 if (i == ARRAY_SIZE (recip_options))
3863 error ("unknown option for -mrecip=%s", q);
3865 mask = RECIP_MASK_NONE;
3869 recip_mask_explicit |= mask;
3871 recip_mask &= ~mask;
3878 recip_mask |= RECIP_MASK_ALL & ~recip_mask_explicit;
3879 else if (target_flags_explicit & MASK_RECIP)
3880 recip_mask &= ~(RECIP_MASK_ALL & ~recip_mask_explicit);
3882 /* Save the initial options in case the user does function specific
3885 target_option_default_node = target_option_current_node
3886 = build_target_option_node ();
3889 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3892 function_pass_avx256_p (const_rtx val)
3897 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3900 if (GET_CODE (val) == PARALLEL)
3905 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3907 r = XVECEXP (val, 0, i);
3908 if (GET_CODE (r) == EXPR_LIST
3910 && REG_P (XEXP (r, 0))
3911 && (GET_MODE (XEXP (r, 0)) == OImode
3912 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3920 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3923 ix86_option_override (void)
3925 ix86_option_override_internal (true);
3928 /* Update register usage after having seen the compiler flags. */
3931 ix86_conditional_register_usage (void)
3936 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3938 if (fixed_regs[i] > 1)
3939 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3940 if (call_used_regs[i] > 1)
3941 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3944 /* The PIC register, if it exists, is fixed. */
3945 j = PIC_OFFSET_TABLE_REGNUM;
3946 if (j != INVALID_REGNUM)
3947 fixed_regs[j] = call_used_regs[j] = 1;
3949 /* The 64-bit MS_ABI changes the set of call-used registers. */
3950 if (TARGET_64BIT_MS_ABI)
3952 call_used_regs[SI_REG] = 0;
3953 call_used_regs[DI_REG] = 0;
3954 call_used_regs[XMM6_REG] = 0;
3955 call_used_regs[XMM7_REG] = 0;
3956 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3957 call_used_regs[i] = 0;
3960 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3961 other call-clobbered regs for 64-bit. */
3964 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3966 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3967 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3968 && call_used_regs[i])
3969 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3972 /* If MMX is disabled, squash the registers. */
3974 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3975 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3976 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3978 /* If SSE is disabled, squash the registers. */
3980 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3981 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3982 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3984 /* If the FPU is disabled, squash the registers. */
3985 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3986 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3987 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3988 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3990 /* If 32-bit, squash the 64-bit registers. */
3993 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3995 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4001 /* Save the current options */
4004 ix86_function_specific_save (struct cl_target_option *ptr)
4006 ptr->arch = ix86_arch;
4007 ptr->schedule = ix86_schedule;
4008 ptr->tune = ix86_tune;
4009 ptr->branch_cost = ix86_branch_cost;
4010 ptr->tune_defaulted = ix86_tune_defaulted;
4011 ptr->arch_specified = ix86_arch_specified;
4012 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4013 ptr->ix86_target_flags_explicit = target_flags_explicit;
4014 ptr->x_recip_mask_explicit = recip_mask_explicit;
4016 /* The fields are char but the variables are not; make sure the
4017 values fit in the fields. */
4018 gcc_assert (ptr->arch == ix86_arch);
4019 gcc_assert (ptr->schedule == ix86_schedule);
4020 gcc_assert (ptr->tune == ix86_tune);
4021 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4024 /* Restore the current options */
4027 ix86_function_specific_restore (struct cl_target_option *ptr)
4029 enum processor_type old_tune = ix86_tune;
4030 enum processor_type old_arch = ix86_arch;
4031 unsigned int ix86_arch_mask, ix86_tune_mask;
4034 ix86_arch = (enum processor_type) ptr->arch;
4035 ix86_schedule = (enum attr_cpu) ptr->schedule;
4036 ix86_tune = (enum processor_type) ptr->tune;
4037 ix86_branch_cost = ptr->branch_cost;
4038 ix86_tune_defaulted = ptr->tune_defaulted;
4039 ix86_arch_specified = ptr->arch_specified;
4040 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4041 target_flags_explicit = ptr->ix86_target_flags_explicit;
4042 recip_mask_explicit = ptr->x_recip_mask_explicit;
4044 /* Recreate the arch feature tests if the arch changed */
4045 if (old_arch != ix86_arch)
4047 ix86_arch_mask = 1u << ix86_arch;
4048 for (i = 0; i < X86_ARCH_LAST; ++i)
4049 ix86_arch_features[i]
4050 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4053 /* Recreate the tune optimization tests */
4054 if (old_tune != ix86_tune)
4056 ix86_tune_mask = 1u << ix86_tune;
4057 for (i = 0; i < X86_TUNE_LAST; ++i)
4058 ix86_tune_features[i]
4059 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4063 /* Print the current options */
4066 ix86_function_specific_print (FILE *file, int indent,
4067 struct cl_target_option *ptr)
4070 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4071 NULL, NULL, ptr->x_ix86_fpmath, false);
4073 fprintf (file, "%*sarch = %d (%s)\n",
4076 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4077 ? cpu_names[ptr->arch]
4080 fprintf (file, "%*stune = %d (%s)\n",
4083 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4084 ? cpu_names[ptr->tune]
4087 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4091 fprintf (file, "%*s%s\n", indent, "", target_string);
4092 free (target_string);
4097 /* Inner function to process the attribute((target(...))), take an argument and
4098 set the current options from the argument. If we have a list, recursively go
4102 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4103 struct gcc_options *enum_opts_set)
4108 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4109 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4110 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4111 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4112 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4128 enum ix86_opt_type type;
4133 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4134 IX86_ATTR_ISA ("abm", OPT_mabm),
4135 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4136 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4137 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4138 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4139 IX86_ATTR_ISA ("aes", OPT_maes),
4140 IX86_ATTR_ISA ("avx", OPT_mavx),
4141 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4142 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4143 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4144 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4145 IX86_ATTR_ISA ("sse", OPT_msse),
4146 IX86_ATTR_ISA ("sse2", OPT_msse2),
4147 IX86_ATTR_ISA ("sse3", OPT_msse3),
4148 IX86_ATTR_ISA ("sse4", OPT_msse4),
4149 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4150 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4151 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4152 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4153 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4154 IX86_ATTR_ISA ("fma", OPT_mfma),
4155 IX86_ATTR_ISA ("xop", OPT_mxop),
4156 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4157 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4158 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4159 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4162 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4164 /* string options */
4165 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4166 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4169 IX86_ATTR_YES ("cld",
4173 IX86_ATTR_NO ("fancy-math-387",
4174 OPT_mfancy_math_387,
4175 MASK_NO_FANCY_MATH_387),
4177 IX86_ATTR_YES ("ieee-fp",
4181 IX86_ATTR_YES ("inline-all-stringops",
4182 OPT_minline_all_stringops,
4183 MASK_INLINE_ALL_STRINGOPS),
4185 IX86_ATTR_YES ("inline-stringops-dynamically",
4186 OPT_minline_stringops_dynamically,
4187 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4189 IX86_ATTR_NO ("align-stringops",
4190 OPT_mno_align_stringops,
4191 MASK_NO_ALIGN_STRINGOPS),
4193 IX86_ATTR_YES ("recip",
4199 /* If this is a list, recurse to get the options. */
4200 if (TREE_CODE (args) == TREE_LIST)
4204 for (; args; args = TREE_CHAIN (args))
4205 if (TREE_VALUE (args)
4206 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4207 p_strings, enum_opts_set))
4213 else if (TREE_CODE (args) != STRING_CST)
4216 /* Handle multiple arguments separated by commas. */
4217 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4219 while (next_optstr && *next_optstr != '\0')
4221 char *p = next_optstr;
4223 char *comma = strchr (next_optstr, ',');
4224 const char *opt_string;
4225 size_t len, opt_len;
4230 enum ix86_opt_type type = ix86_opt_unknown;
4236 len = comma - next_optstr;
4237 next_optstr = comma + 1;
4245 /* Recognize no-xxx. */
4246 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4255 /* Find the option. */
4258 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4260 type = attrs[i].type;
4261 opt_len = attrs[i].len;
4262 if (ch == attrs[i].string[0]
4263 && ((type != ix86_opt_str && type != ix86_opt_enum)
4266 && memcmp (p, attrs[i].string, opt_len) == 0)
4269 mask = attrs[i].mask;
4270 opt_string = attrs[i].string;
4275 /* Process the option. */
4278 error ("attribute(target(\"%s\")) is unknown", orig_p);
4282 else if (type == ix86_opt_isa)
4284 struct cl_decoded_option decoded;
4286 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4287 ix86_handle_option (&global_options, &global_options_set,
4288 &decoded, input_location);
4291 else if (type == ix86_opt_yes || type == ix86_opt_no)
4293 if (type == ix86_opt_no)
4294 opt_set_p = !opt_set_p;
4297 target_flags |= mask;
4299 target_flags &= ~mask;
4302 else if (type == ix86_opt_str)
4306 error ("option(\"%s\") was already specified", opt_string);
4310 p_strings[opt] = xstrdup (p + opt_len);
4313 else if (type == ix86_opt_enum)
4318 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4320 set_option (&global_options, enum_opts_set, opt, value,
4321 p + opt_len, DK_UNSPECIFIED, input_location,
4325 error ("attribute(target(\"%s\")) is unknown", orig_p);
4337 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4340 ix86_valid_target_attribute_tree (tree args)
4342 const char *orig_arch_string = ix86_arch_string;
4343 const char *orig_tune_string = ix86_tune_string;
4344 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4345 int orig_tune_defaulted = ix86_tune_defaulted;
4346 int orig_arch_specified = ix86_arch_specified;
4347 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4350 struct cl_target_option *def
4351 = TREE_TARGET_OPTION (target_option_default_node);
4352 struct gcc_options enum_opts_set;
4354 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4356 /* Process each of the options on the chain. */
4357 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4361 /* If the changed options are different from the default, rerun
4362 ix86_option_override_internal, and then save the options away.
4363 The string options are are attribute options, and will be undone
4364 when we copy the save structure. */
4365 if (ix86_isa_flags != def->x_ix86_isa_flags
4366 || target_flags != def->x_target_flags
4367 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4368 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4369 || enum_opts_set.x_ix86_fpmath)
4371 /* If we are using the default tune= or arch=, undo the string assigned,
4372 and use the default. */
4373 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4374 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4375 else if (!orig_arch_specified)
4376 ix86_arch_string = NULL;
4378 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4379 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4380 else if (orig_tune_defaulted)
4381 ix86_tune_string = NULL;
4383 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4384 if (enum_opts_set.x_ix86_fpmath)
4385 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4386 else if (!TARGET_64BIT && TARGET_SSE)
4388 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4389 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4392 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4393 ix86_option_override_internal (false);
4395 /* Add any builtin functions with the new isa if any. */
4396 ix86_add_new_builtins (ix86_isa_flags);
4398 /* Save the current options unless we are validating options for
4400 t = build_target_option_node ();
4402 ix86_arch_string = orig_arch_string;
4403 ix86_tune_string = orig_tune_string;
4404 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4406 /* Free up memory allocated to hold the strings */
4407 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4408 free (option_strings[i]);
4414 /* Hook to validate attribute((target("string"))). */
4417 ix86_valid_target_attribute_p (tree fndecl,
4418 tree ARG_UNUSED (name),
4420 int ARG_UNUSED (flags))
4422 struct cl_target_option cur_target;
4424 tree old_optimize = build_optimization_node ();
4425 tree new_target, new_optimize;
4426 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4428 /* If the function changed the optimization levels as well as setting target
4429 options, start with the optimizations specified. */
4430 if (func_optimize && func_optimize != old_optimize)
4431 cl_optimization_restore (&global_options,
4432 TREE_OPTIMIZATION (func_optimize));
4434 /* The target attributes may also change some optimization flags, so update
4435 the optimization options if necessary. */
4436 cl_target_option_save (&cur_target, &global_options);
4437 new_target = ix86_valid_target_attribute_tree (args);
4438 new_optimize = build_optimization_node ();
4445 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4447 if (old_optimize != new_optimize)
4448 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4451 cl_target_option_restore (&global_options, &cur_target);
4453 if (old_optimize != new_optimize)
4454 cl_optimization_restore (&global_options,
4455 TREE_OPTIMIZATION (old_optimize));
4461 /* Hook to determine if one function can safely inline another. */
4464 ix86_can_inline_p (tree caller, tree callee)
4467 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4468 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4470 /* If callee has no option attributes, then it is ok to inline. */
4474 /* If caller has no option attributes, but callee does then it is not ok to
4476 else if (!caller_tree)
4481 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4482 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4484 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4485 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4487 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4488 != callee_opts->x_ix86_isa_flags)
4491 /* See if we have the same non-isa options. */
4492 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4495 /* See if arch, tune, etc. are the same. */
4496 else if (caller_opts->arch != callee_opts->arch)
4499 else if (caller_opts->tune != callee_opts->tune)
4502 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4505 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4516 /* Remember the last target of ix86_set_current_function. */
4517 static GTY(()) tree ix86_previous_fndecl;
4519 /* Establish appropriate back-end context for processing the function
4520 FNDECL. The argument might be NULL to indicate processing at top
4521 level, outside of any function scope. */
4523 ix86_set_current_function (tree fndecl)
4525 /* Only change the context if the function changes. This hook is called
4526 several times in the course of compiling a function, and we don't want to
4527 slow things down too much or call target_reinit when it isn't safe. */
4528 if (fndecl && fndecl != ix86_previous_fndecl)
4530 tree old_tree = (ix86_previous_fndecl
4531 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4534 tree new_tree = (fndecl
4535 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4538 ix86_previous_fndecl = fndecl;
4539 if (old_tree == new_tree)
4544 cl_target_option_restore (&global_options,
4545 TREE_TARGET_OPTION (new_tree));
4551 struct cl_target_option *def
4552 = TREE_TARGET_OPTION (target_option_current_node);
4554 cl_target_option_restore (&global_options, def);
4561 /* Return true if this goes in large data/bss. */
4564 ix86_in_large_data_p (tree exp)
4566 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4569 /* Functions are never large data. */
4570 if (TREE_CODE (exp) == FUNCTION_DECL)
4573 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4575 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4576 if (strcmp (section, ".ldata") == 0
4577 || strcmp (section, ".lbss") == 0)
4583 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4585 /* If this is an incomplete type with size 0, then we can't put it
4586 in data because it might be too big when completed. */
4587 if (!size || size > ix86_section_threshold)
4594 /* Switch to the appropriate section for output of DECL.
4595 DECL is either a `VAR_DECL' node or a constant of some sort.
4596 RELOC indicates whether forming the initial value of DECL requires
4597 link-time relocations. */
4599 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4603 x86_64_elf_select_section (tree decl, int reloc,
4604 unsigned HOST_WIDE_INT align)
4606 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4607 && ix86_in_large_data_p (decl))
4609 const char *sname = NULL;
4610 unsigned int flags = SECTION_WRITE;
4611 switch (categorize_decl_for_section (decl, reloc))
4616 case SECCAT_DATA_REL:
4617 sname = ".ldata.rel";
4619 case SECCAT_DATA_REL_LOCAL:
4620 sname = ".ldata.rel.local";
4622 case SECCAT_DATA_REL_RO:
4623 sname = ".ldata.rel.ro";
4625 case SECCAT_DATA_REL_RO_LOCAL:
4626 sname = ".ldata.rel.ro.local";
4630 flags |= SECTION_BSS;
4633 case SECCAT_RODATA_MERGE_STR:
4634 case SECCAT_RODATA_MERGE_STR_INIT:
4635 case SECCAT_RODATA_MERGE_CONST:
4639 case SECCAT_SRODATA:
4646 /* We don't split these for medium model. Place them into
4647 default sections and hope for best. */
4652 /* We might get called with string constants, but get_named_section
4653 doesn't like them as they are not DECLs. Also, we need to set
4654 flags in that case. */
4656 return get_section (sname, flags, NULL);
4657 return get_named_section (decl, sname, reloc);
4660 return default_elf_select_section (decl, reloc, align);
4663 /* Build up a unique section name, expressed as a
4664 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4665 RELOC indicates whether the initial value of EXP requires
4666 link-time relocations. */
4668 static void ATTRIBUTE_UNUSED
4669 x86_64_elf_unique_section (tree decl, int reloc)
4671 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4672 && ix86_in_large_data_p (decl))
4674 const char *prefix = NULL;
4675 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4676 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4678 switch (categorize_decl_for_section (decl, reloc))
4681 case SECCAT_DATA_REL:
4682 case SECCAT_DATA_REL_LOCAL:
4683 case SECCAT_DATA_REL_RO:
4684 case SECCAT_DATA_REL_RO_LOCAL:
4685 prefix = one_only ? ".ld" : ".ldata";
4688 prefix = one_only ? ".lb" : ".lbss";
4691 case SECCAT_RODATA_MERGE_STR:
4692 case SECCAT_RODATA_MERGE_STR_INIT:
4693 case SECCAT_RODATA_MERGE_CONST:
4694 prefix = one_only ? ".lr" : ".lrodata";
4696 case SECCAT_SRODATA:
4703 /* We don't split these for medium model. Place them into
4704 default sections and hope for best. */
4709 const char *name, *linkonce;
4712 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4713 name = targetm.strip_name_encoding (name);
4715 /* If we're using one_only, then there needs to be a .gnu.linkonce
4716 prefix to the section name. */
4717 linkonce = one_only ? ".gnu.linkonce" : "";
4719 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4721 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4725 default_unique_section (decl, reloc);
4728 #ifdef COMMON_ASM_OP
4729 /* This says how to output assembler code to declare an
4730 uninitialized external linkage data object.
4732 For medium model x86-64 we need to use .largecomm opcode for
4735 x86_elf_aligned_common (FILE *file,
4736 const char *name, unsigned HOST_WIDE_INT size,
4739 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4740 && size > (unsigned int)ix86_section_threshold)
4741 fputs (".largecomm\t", file);
4743 fputs (COMMON_ASM_OP, file);
4744 assemble_name (file, name);
4745 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4746 size, align / BITS_PER_UNIT);
4750 /* Utility function for targets to use in implementing
4751 ASM_OUTPUT_ALIGNED_BSS. */
4754 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4755 const char *name, unsigned HOST_WIDE_INT size,
4758 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4759 && size > (unsigned int)ix86_section_threshold)
4760 switch_to_section (get_named_section (decl, ".lbss", 0));
4762 switch_to_section (bss_section);
4763 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4764 #ifdef ASM_DECLARE_OBJECT_NAME
4765 last_assemble_variable_decl = decl;
4766 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4768 /* Standard thing is just output label for the object. */
4769 ASM_OUTPUT_LABEL (file, name);
4770 #endif /* ASM_DECLARE_OBJECT_NAME */
4771 ASM_OUTPUT_SKIP (file, size ? size : 1);
4774 /* Decide whether we must probe the stack before any space allocation
4775 on this target. It's essentially TARGET_STACK_PROBE except when
4776 -fstack-check causes the stack to be already probed differently. */
4779 ix86_target_stack_probe (void)
4781 /* Do not probe the stack twice if static stack checking is enabled. */
4782 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4785 return TARGET_STACK_PROBE;
4788 /* Decide whether we can make a sibling call to a function. DECL is the
4789 declaration of the function being targeted by the call and EXP is the
4790 CALL_EXPR representing the call. */
4793 ix86_function_ok_for_sibcall (tree decl, tree exp)
4795 tree type, decl_or_type;
4798 /* If we are generating position-independent code, we cannot sibcall
4799 optimize any indirect call, or a direct call to a global function,
4800 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4804 && (!decl || !targetm.binds_local_p (decl)))
4807 /* If we need to align the outgoing stack, then sibcalling would
4808 unalign the stack, which may break the called function. */
4809 if (ix86_minimum_incoming_stack_boundary (true)
4810 < PREFERRED_STACK_BOUNDARY)
4815 decl_or_type = decl;
4816 type = TREE_TYPE (decl);
4820 /* We're looking at the CALL_EXPR, we need the type of the function. */
4821 type = CALL_EXPR_FN (exp); /* pointer expression */
4822 type = TREE_TYPE (type); /* pointer type */
4823 type = TREE_TYPE (type); /* function type */
4824 decl_or_type = type;
4827 /* Check that the return value locations are the same. Like
4828 if we are returning floats on the 80387 register stack, we cannot
4829 make a sibcall from a function that doesn't return a float to a
4830 function that does or, conversely, from a function that does return
4831 a float to a function that doesn't; the necessary stack adjustment
4832 would not be executed. This is also the place we notice
4833 differences in the return value ABI. Note that it is ok for one
4834 of the functions to have void return type as long as the return
4835 value of the other is passed in a register. */
4836 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4837 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4839 if (STACK_REG_P (a) || STACK_REG_P (b))
4841 if (!rtx_equal_p (a, b))
4844 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4846 /* Disable sibcall if we need to generate vzeroupper after
4848 if (TARGET_VZEROUPPER
4849 && cfun->machine->callee_return_avx256_p
4850 && !cfun->machine->caller_return_avx256_p)
4853 else if (!rtx_equal_p (a, b))
4858 /* The SYSV ABI has more call-clobbered registers;
4859 disallow sibcalls from MS to SYSV. */
4860 if (cfun->machine->call_abi == MS_ABI
4861 && ix86_function_type_abi (type) == SYSV_ABI)
4866 /* If this call is indirect, we'll need to be able to use a
4867 call-clobbered register for the address of the target function.
4868 Make sure that all such registers are not used for passing
4869 parameters. Note that DLLIMPORT functions are indirect. */
4871 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4873 if (ix86_function_regparm (type, NULL) >= 3)
4875 /* ??? Need to count the actual number of registers to be used,
4876 not the possible number of registers. Fix later. */
4882 /* Otherwise okay. That also includes certain types of indirect calls. */
4886 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4887 and "sseregparm" calling convention attributes;
4888 arguments as in struct attribute_spec.handler. */
4891 ix86_handle_cconv_attribute (tree *node, tree name,
4893 int flags ATTRIBUTE_UNUSED,
4896 if (TREE_CODE (*node) != FUNCTION_TYPE
4897 && TREE_CODE (*node) != METHOD_TYPE
4898 && TREE_CODE (*node) != FIELD_DECL
4899 && TREE_CODE (*node) != TYPE_DECL)
4901 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4903 *no_add_attrs = true;
4907 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4908 if (is_attribute_p ("regparm", name))
4912 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4914 error ("fastcall and regparm attributes are not compatible");
4917 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4919 error ("regparam and thiscall attributes are not compatible");
4922 cst = TREE_VALUE (args);
4923 if (TREE_CODE (cst) != INTEGER_CST)
4925 warning (OPT_Wattributes,
4926 "%qE attribute requires an integer constant argument",
4928 *no_add_attrs = true;
4930 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4932 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4934 *no_add_attrs = true;
4942 /* Do not warn when emulating the MS ABI. */
4943 if ((TREE_CODE (*node) != FUNCTION_TYPE
4944 && TREE_CODE (*node) != METHOD_TYPE)
4945 || ix86_function_type_abi (*node) != MS_ABI)
4946 warning (OPT_Wattributes, "%qE attribute ignored",
4948 *no_add_attrs = true;
4952 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4953 if (is_attribute_p ("fastcall", name))
4955 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4957 error ("fastcall and cdecl attributes are not compatible");
4959 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4961 error ("fastcall and stdcall attributes are not compatible");
4963 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4965 error ("fastcall and regparm attributes are not compatible");
4967 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4969 error ("fastcall and thiscall attributes are not compatible");
4973 /* Can combine stdcall with fastcall (redundant), regparm and
4975 else if (is_attribute_p ("stdcall", name))
4977 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4979 error ("stdcall and cdecl attributes are not compatible");
4981 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4983 error ("stdcall and fastcall attributes are not compatible");
4985 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4987 error ("stdcall and thiscall attributes are not compatible");
4991 /* Can combine cdecl with regparm and sseregparm. */
4992 else if (is_attribute_p ("cdecl", name))
4994 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4996 error ("stdcall and cdecl attributes are not compatible");
4998 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5000 error ("fastcall and cdecl attributes are not compatible");
5002 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5004 error ("cdecl and thiscall attributes are not compatible");
5007 else if (is_attribute_p ("thiscall", name))
5009 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5010 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5012 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5014 error ("stdcall and thiscall attributes are not compatible");
5016 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5018 error ("fastcall and thiscall attributes are not compatible");
5020 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5022 error ("cdecl and thiscall attributes are not compatible");
5026 /* Can combine sseregparm with all attributes. */
5031 /* The transactional memory builtins are implicitly regparm or fastcall
5032 depending on the ABI. Override the generic do-nothing attribute that
5033 these builtins were declared with, and replace it with one of the two
5034 attributes that we expect elsewhere. */
5037 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5038 tree args ATTRIBUTE_UNUSED,
5039 int flags ATTRIBUTE_UNUSED,
5044 /* In no case do we want to add the placeholder attribute. */
5045 *no_add_attrs = true;
5047 /* The 64-bit ABI is unchanged for transactional memory. */
5051 /* ??? Is there a better way to validate 32-bit windows? We have
5052 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5053 if (CHECK_STACK_LIMIT > 0)
5054 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5057 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5058 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5060 decl_attributes (node, alt, flags);
5065 /* This function determines from TYPE the calling-convention. */
5068 ix86_get_callcvt (const_tree type)
5070 unsigned int ret = 0;
5075 return IX86_CALLCVT_CDECL;
5077 attrs = TYPE_ATTRIBUTES (type);
5078 if (attrs != NULL_TREE)
5080 if (lookup_attribute ("cdecl", attrs))
5081 ret |= IX86_CALLCVT_CDECL;
5082 else if (lookup_attribute ("stdcall", attrs))
5083 ret |= IX86_CALLCVT_STDCALL;
5084 else if (lookup_attribute ("fastcall", attrs))
5085 ret |= IX86_CALLCVT_FASTCALL;
5086 else if (lookup_attribute ("thiscall", attrs))
5087 ret |= IX86_CALLCVT_THISCALL;
5089 /* Regparam isn't allowed for thiscall and fastcall. */
5090 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5092 if (lookup_attribute ("regparm", attrs))
5093 ret |= IX86_CALLCVT_REGPARM;
5094 if (lookup_attribute ("sseregparm", attrs))
5095 ret |= IX86_CALLCVT_SSEREGPARM;
5098 if (IX86_BASE_CALLCVT(ret) != 0)
5102 is_stdarg = stdarg_p (type);
5103 if (TARGET_RTD && !is_stdarg)
5104 return IX86_CALLCVT_STDCALL | ret;
5108 || TREE_CODE (type) != METHOD_TYPE
5109 || ix86_function_type_abi (type) != MS_ABI)
5110 return IX86_CALLCVT_CDECL | ret;
5112 return IX86_CALLCVT_THISCALL;
5115 /* Return 0 if the attributes for two types are incompatible, 1 if they
5116 are compatible, and 2 if they are nearly compatible (which causes a
5117 warning to be generated). */
5120 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5122 unsigned int ccvt1, ccvt2;
5124 if (TREE_CODE (type1) != FUNCTION_TYPE
5125 && TREE_CODE (type1) != METHOD_TYPE)
5128 ccvt1 = ix86_get_callcvt (type1);
5129 ccvt2 = ix86_get_callcvt (type2);
5132 if (ix86_function_regparm (type1, NULL)
5133 != ix86_function_regparm (type2, NULL))
5139 /* Return the regparm value for a function with the indicated TYPE and DECL.
5140 DECL may be NULL when calling function indirectly
5141 or considering a libcall. */
5144 ix86_function_regparm (const_tree type, const_tree decl)
5151 return (ix86_function_type_abi (type) == SYSV_ABI
5152 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5153 ccvt = ix86_get_callcvt (type);
5154 regparm = ix86_regparm;
5156 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5158 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5161 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5165 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5167 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5170 /* Use register calling convention for local functions when possible. */
5172 && TREE_CODE (decl) == FUNCTION_DECL
5174 && !(profile_flag && !flag_fentry))
5176 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5177 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5178 if (i && i->local && i->can_change_signature)
5180 int local_regparm, globals = 0, regno;
5182 /* Make sure no regparm register is taken by a
5183 fixed register variable. */
5184 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5185 if (fixed_regs[local_regparm])
5188 /* We don't want to use regparm(3) for nested functions as
5189 these use a static chain pointer in the third argument. */
5190 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5193 /* In 32-bit mode save a register for the split stack. */
5194 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5197 /* Each fixed register usage increases register pressure,
5198 so less registers should be used for argument passing.
5199 This functionality can be overriden by an explicit
5201 for (regno = 0; regno <= DI_REG; regno++)
5202 if (fixed_regs[regno])
5206 = globals < local_regparm ? local_regparm - globals : 0;
5208 if (local_regparm > regparm)
5209 regparm = local_regparm;
5216 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5217 DFmode (2) arguments in SSE registers for a function with the
5218 indicated TYPE and DECL. DECL may be NULL when calling function
5219 indirectly or considering a libcall. Otherwise return 0. */
5222 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5224 gcc_assert (!TARGET_64BIT);
5226 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5227 by the sseregparm attribute. */
5228 if (TARGET_SSEREGPARM
5229 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5236 error ("calling %qD with attribute sseregparm without "
5237 "SSE/SSE2 enabled", decl);
5239 error ("calling %qT with attribute sseregparm without "
5240 "SSE/SSE2 enabled", type);
5248 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5249 (and DFmode for SSE2) arguments in SSE registers. */
5250 if (decl && TARGET_SSE_MATH && optimize
5251 && !(profile_flag && !flag_fentry))
5253 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5254 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5255 if (i && i->local && i->can_change_signature)
5256 return TARGET_SSE2 ? 2 : 1;
5262 /* Return true if EAX is live at the start of the function. Used by
5263 ix86_expand_prologue to determine if we need special help before
5264 calling allocate_stack_worker. */
5267 ix86_eax_live_at_start_p (void)
5269 /* Cheat. Don't bother working forward from ix86_function_regparm
5270 to the function type to whether an actual argument is located in
5271 eax. Instead just look at cfg info, which is still close enough
5272 to correct at this point. This gives false positives for broken
5273 functions that might use uninitialized data that happens to be
5274 allocated in eax, but who cares? */
5275 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5279 ix86_keep_aggregate_return_pointer (tree fntype)
5285 attr = lookup_attribute ("callee_pop_aggregate_return",
5286 TYPE_ATTRIBUTES (fntype));
5288 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5290 /* For 32-bit MS-ABI the default is to keep aggregate
5292 if (ix86_function_type_abi (fntype) == MS_ABI)
5295 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5298 /* Value is the number of bytes of arguments automatically
5299 popped when returning from a subroutine call.
5300 FUNDECL is the declaration node of the function (as a tree),
5301 FUNTYPE is the data type of the function (as a tree),
5302 or for a library call it is an identifier node for the subroutine name.
5303 SIZE is the number of bytes of arguments passed on the stack.
5305 On the 80386, the RTD insn may be used to pop them if the number
5306 of args is fixed, but if the number is variable then the caller
5307 must pop them all. RTD can't be used for library calls now
5308 because the library is compiled with the Unix compiler.
5309 Use of RTD is a selectable option, since it is incompatible with
5310 standard Unix calling sequences. If the option is not selected,
5311 the caller must always pop the args.
5313 The attribute stdcall is equivalent to RTD on a per module basis. */
5316 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5320 /* None of the 64-bit ABIs pop arguments. */
5324 ccvt = ix86_get_callcvt (funtype);
5326 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5327 | IX86_CALLCVT_THISCALL)) != 0
5328 && ! stdarg_p (funtype))
5331 /* Lose any fake structure return argument if it is passed on the stack. */
5332 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5333 && !ix86_keep_aggregate_return_pointer (funtype))
5335 int nregs = ix86_function_regparm (funtype, fundecl);
5337 return GET_MODE_SIZE (Pmode);
5343 /* Argument support functions. */
5345 /* Return true when register may be used to pass function parameters. */
5347 ix86_function_arg_regno_p (int regno)
5350 const int *parm_regs;
5355 return (regno < REGPARM_MAX
5356 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5358 return (regno < REGPARM_MAX
5359 || (TARGET_MMX && MMX_REGNO_P (regno)
5360 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5361 || (TARGET_SSE && SSE_REGNO_P (regno)
5362 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5367 if (SSE_REGNO_P (regno) && TARGET_SSE)
5372 if (TARGET_SSE && SSE_REGNO_P (regno)
5373 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5377 /* TODO: The function should depend on current function ABI but
5378 builtins.c would need updating then. Therefore we use the
5381 /* RAX is used as hidden argument to va_arg functions. */
5382 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5385 if (ix86_abi == MS_ABI)
5386 parm_regs = x86_64_ms_abi_int_parameter_registers;
5388 parm_regs = x86_64_int_parameter_registers;
5389 for (i = 0; i < (ix86_abi == MS_ABI
5390 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5391 if (regno == parm_regs[i])
5396 /* Return if we do not know how to pass TYPE solely in registers. */
5399 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5401 if (must_pass_in_stack_var_size_or_pad (mode, type))
5404 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5405 The layout_type routine is crafty and tries to trick us into passing
5406 currently unsupported vector types on the stack by using TImode. */
5407 return (!TARGET_64BIT && mode == TImode
5408 && type && TREE_CODE (type) != VECTOR_TYPE);
5411 /* It returns the size, in bytes, of the area reserved for arguments passed
5412 in registers for the function represented by fndecl dependent to the used
5415 ix86_reg_parm_stack_space (const_tree fndecl)
5417 enum calling_abi call_abi = SYSV_ABI;
5418 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5419 call_abi = ix86_function_abi (fndecl);
5421 call_abi = ix86_function_type_abi (fndecl);
5422 if (TARGET_64BIT && call_abi == MS_ABI)
5427 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5430 ix86_function_type_abi (const_tree fntype)
5432 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5434 enum calling_abi abi = ix86_abi;
5435 if (abi == SYSV_ABI)
5437 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5440 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5448 ix86_function_ms_hook_prologue (const_tree fn)
5450 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5452 if (decl_function_context (fn) != NULL_TREE)
5453 error_at (DECL_SOURCE_LOCATION (fn),
5454 "ms_hook_prologue is not compatible with nested function");
5461 static enum calling_abi
5462 ix86_function_abi (const_tree fndecl)
5466 return ix86_function_type_abi (TREE_TYPE (fndecl));
5469 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5472 ix86_cfun_abi (void)
5476 return cfun->machine->call_abi;
5479 /* Write the extra assembler code needed to declare a function properly. */
5482 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5485 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5489 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5490 unsigned int filler_cc = 0xcccccccc;
5492 for (i = 0; i < filler_count; i += 4)
5493 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5496 #ifdef SUBTARGET_ASM_UNWIND_INIT
5497 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5500 ASM_OUTPUT_LABEL (asm_out_file, fname);
5502 /* Output magic byte marker, if hot-patch attribute is set. */
5507 /* leaq [%rsp + 0], %rsp */
5508 asm_fprintf (asm_out_file, ASM_BYTE
5509 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5513 /* movl.s %edi, %edi
5515 movl.s %esp, %ebp */
5516 asm_fprintf (asm_out_file, ASM_BYTE
5517 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5523 extern void init_regs (void);
5525 /* Implementation of call abi switching target hook. Specific to FNDECL
5526 the specific call register sets are set. See also
5527 ix86_conditional_register_usage for more details. */
5529 ix86_call_abi_override (const_tree fndecl)
5531 if (fndecl == NULL_TREE)
5532 cfun->machine->call_abi = ix86_abi;
5534 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5537 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5538 expensive re-initialization of init_regs each time we switch function context
5539 since this is needed only during RTL expansion. */
5541 ix86_maybe_switch_abi (void)
5544 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5548 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5549 for a call to a function whose data type is FNTYPE.
5550 For a library call, FNTYPE is 0. */
5553 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5554 tree fntype, /* tree ptr for function decl */
5555 rtx libname, /* SYMBOL_REF of library name or 0 */
5559 struct cgraph_local_info *i;
5562 memset (cum, 0, sizeof (*cum));
5564 /* Initialize for the current callee. */
5567 cfun->machine->callee_pass_avx256_p = false;
5568 cfun->machine->callee_return_avx256_p = false;
5573 i = cgraph_local_info (fndecl);
5574 cum->call_abi = ix86_function_abi (fndecl);
5575 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5580 cum->call_abi = ix86_function_type_abi (fntype);
5582 fnret_type = TREE_TYPE (fntype);
5587 if (TARGET_VZEROUPPER && fnret_type)
5589 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5591 if (function_pass_avx256_p (fnret_value))
5593 /* The return value of this function uses 256bit AVX modes. */
5595 cfun->machine->callee_return_avx256_p = true;
5597 cfun->machine->caller_return_avx256_p = true;
5601 cum->caller = caller;
5603 /* Set up the number of registers to use for passing arguments. */
5605 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5606 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5607 "or subtarget optimization implying it");
5608 cum->nregs = ix86_regparm;
5611 cum->nregs = (cum->call_abi == SYSV_ABI
5612 ? X86_64_REGPARM_MAX
5613 : X86_64_MS_REGPARM_MAX);
5617 cum->sse_nregs = SSE_REGPARM_MAX;
5620 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5621 ? X86_64_SSE_REGPARM_MAX
5622 : X86_64_MS_SSE_REGPARM_MAX);
5626 cum->mmx_nregs = MMX_REGPARM_MAX;
5627 cum->warn_avx = true;
5628 cum->warn_sse = true;
5629 cum->warn_mmx = true;
5631 /* Because type might mismatch in between caller and callee, we need to
5632 use actual type of function for local calls.
5633 FIXME: cgraph_analyze can be told to actually record if function uses
5634 va_start so for local functions maybe_vaarg can be made aggressive
5636 FIXME: once typesytem is fixed, we won't need this code anymore. */
5637 if (i && i->local && i->can_change_signature)
5638 fntype = TREE_TYPE (fndecl);
5639 cum->maybe_vaarg = (fntype
5640 ? (!prototype_p (fntype) || stdarg_p (fntype))
5645 /* If there are variable arguments, then we won't pass anything
5646 in registers in 32-bit mode. */
5647 if (stdarg_p (fntype))
5658 /* Use ecx and edx registers if function has fastcall attribute,
5659 else look for regparm information. */
5662 unsigned int ccvt = ix86_get_callcvt (fntype);
5663 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5666 cum->fastcall = 1; /* Same first register as in fastcall. */
5668 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5674 cum->nregs = ix86_function_regparm (fntype, fndecl);
5677 /* Set up the number of SSE registers used for passing SFmode
5678 and DFmode arguments. Warn for mismatching ABI. */
5679 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5683 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5684 But in the case of vector types, it is some vector mode.
5686 When we have only some of our vector isa extensions enabled, then there
5687 are some modes for which vector_mode_supported_p is false. For these
5688 modes, the generic vector support in gcc will choose some non-vector mode
5689 in order to implement the type. By computing the natural mode, we'll
5690 select the proper ABI location for the operand and not depend on whatever
5691 the middle-end decides to do with these vector types.
5693 The midde-end can't deal with the vector types > 16 bytes. In this
5694 case, we return the original mode and warn ABI change if CUM isn't
5697 static enum machine_mode
5698 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5700 enum machine_mode mode = TYPE_MODE (type);
5702 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5704 HOST_WIDE_INT size = int_size_in_bytes (type);
5705 if ((size == 8 || size == 16 || size == 32)
5706 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5707 && TYPE_VECTOR_SUBPARTS (type) > 1)
5709 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5711 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5712 mode = MIN_MODE_VECTOR_FLOAT;
5714 mode = MIN_MODE_VECTOR_INT;
5716 /* Get the mode which has this inner mode and number of units. */
5717 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5718 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5719 && GET_MODE_INNER (mode) == innermode)
5721 if (size == 32 && !TARGET_AVX)
5723 static bool warnedavx;
5730 warning (0, "AVX vector argument without AVX "
5731 "enabled changes the ABI");
5733 return TYPE_MODE (type);
5746 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5747 this may not agree with the mode that the type system has chosen for the
5748 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5749 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5752 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5757 if (orig_mode != BLKmode)
5758 tmp = gen_rtx_REG (orig_mode, regno);
5761 tmp = gen_rtx_REG (mode, regno);
5762 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5763 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5769 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5770 of this code is to classify each 8bytes of incoming argument by the register
5771 class and assign registers accordingly. */
5773 /* Return the union class of CLASS1 and CLASS2.
5774 See the x86-64 PS ABI for details. */
5776 static enum x86_64_reg_class
5777 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5779 /* Rule #1: If both classes are equal, this is the resulting class. */
5780 if (class1 == class2)
5783 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5785 if (class1 == X86_64_NO_CLASS)
5787 if (class2 == X86_64_NO_CLASS)
5790 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5791 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5792 return X86_64_MEMORY_CLASS;
5794 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5795 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5796 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5797 return X86_64_INTEGERSI_CLASS;
5798 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5799 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5800 return X86_64_INTEGER_CLASS;
5802 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5804 if (class1 == X86_64_X87_CLASS
5805 || class1 == X86_64_X87UP_CLASS
5806 || class1 == X86_64_COMPLEX_X87_CLASS
5807 || class2 == X86_64_X87_CLASS
5808 || class2 == X86_64_X87UP_CLASS
5809 || class2 == X86_64_COMPLEX_X87_CLASS)
5810 return X86_64_MEMORY_CLASS;
5812 /* Rule #6: Otherwise class SSE is used. */
5813 return X86_64_SSE_CLASS;
5816 /* Classify the argument of type TYPE and mode MODE.
5817 CLASSES will be filled by the register class used to pass each word
5818 of the operand. The number of words is returned. In case the parameter
5819 should be passed in memory, 0 is returned. As a special case for zero
5820 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5822 BIT_OFFSET is used internally for handling records and specifies offset
5823 of the offset in bits modulo 256 to avoid overflow cases.
5825 See the x86-64 PS ABI for details.
5829 classify_argument (enum machine_mode mode, const_tree type,
5830 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5832 HOST_WIDE_INT bytes =
5833 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5834 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5836 /* Variable sized entities are always passed/returned in memory. */
5840 if (mode != VOIDmode
5841 && targetm.calls.must_pass_in_stack (mode, type))
5844 if (type && AGGREGATE_TYPE_P (type))
5848 enum x86_64_reg_class subclasses[MAX_CLASSES];
5850 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5854 for (i = 0; i < words; i++)
5855 classes[i] = X86_64_NO_CLASS;
5857 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5858 signalize memory class, so handle it as special case. */
5861 classes[0] = X86_64_NO_CLASS;
5865 /* Classify each field of record and merge classes. */
5866 switch (TREE_CODE (type))
5869 /* And now merge the fields of structure. */
5870 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5872 if (TREE_CODE (field) == FIELD_DECL)
5876 if (TREE_TYPE (field) == error_mark_node)
5879 /* Bitfields are always classified as integer. Handle them
5880 early, since later code would consider them to be
5881 misaligned integers. */
5882 if (DECL_BIT_FIELD (field))
5884 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5885 i < ((int_bit_position (field) + (bit_offset % 64))
5886 + tree_low_cst (DECL_SIZE (field), 0)
5889 merge_classes (X86_64_INTEGER_CLASS,
5896 type = TREE_TYPE (field);
5898 /* Flexible array member is ignored. */
5899 if (TYPE_MODE (type) == BLKmode
5900 && TREE_CODE (type) == ARRAY_TYPE
5901 && TYPE_SIZE (type) == NULL_TREE
5902 && TYPE_DOMAIN (type) != NULL_TREE
5903 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5908 if (!warned && warn_psabi)
5911 inform (input_location,
5912 "the ABI of passing struct with"
5913 " a flexible array member has"
5914 " changed in GCC 4.4");
5918 num = classify_argument (TYPE_MODE (type), type,
5920 (int_bit_position (field)
5921 + bit_offset) % 256);
5924 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5925 for (i = 0; i < num && (i + pos) < words; i++)
5927 merge_classes (subclasses[i], classes[i + pos]);
5934 /* Arrays are handled as small records. */
5937 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5938 TREE_TYPE (type), subclasses, bit_offset);
5942 /* The partial classes are now full classes. */
5943 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5944 subclasses[0] = X86_64_SSE_CLASS;
5945 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5946 && !((bit_offset % 64) == 0 && bytes == 4))
5947 subclasses[0] = X86_64_INTEGER_CLASS;
5949 for (i = 0; i < words; i++)
5950 classes[i] = subclasses[i % num];
5955 case QUAL_UNION_TYPE:
5956 /* Unions are similar to RECORD_TYPE but offset is always 0.
5958 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5960 if (TREE_CODE (field) == FIELD_DECL)
5964 if (TREE_TYPE (field) == error_mark_node)
5967 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5968 TREE_TYPE (field), subclasses,
5972 for (i = 0; i < num; i++)
5973 classes[i] = merge_classes (subclasses[i], classes[i]);
5984 /* When size > 16 bytes, if the first one isn't
5985 X86_64_SSE_CLASS or any other ones aren't
5986 X86_64_SSEUP_CLASS, everything should be passed in
5988 if (classes[0] != X86_64_SSE_CLASS)
5991 for (i = 1; i < words; i++)
5992 if (classes[i] != X86_64_SSEUP_CLASS)
5996 /* Final merger cleanup. */
5997 for (i = 0; i < words; i++)
5999 /* If one class is MEMORY, everything should be passed in
6001 if (classes[i] == X86_64_MEMORY_CLASS)
6004 /* The X86_64_SSEUP_CLASS should be always preceded by
6005 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6006 if (classes[i] == X86_64_SSEUP_CLASS
6007 && classes[i - 1] != X86_64_SSE_CLASS
6008 && classes[i - 1] != X86_64_SSEUP_CLASS)
6010 /* The first one should never be X86_64_SSEUP_CLASS. */
6011 gcc_assert (i != 0);
6012 classes[i] = X86_64_SSE_CLASS;
6015 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6016 everything should be passed in memory. */
6017 if (classes[i] == X86_64_X87UP_CLASS
6018 && (classes[i - 1] != X86_64_X87_CLASS))
6022 /* The first one should never be X86_64_X87UP_CLASS. */
6023 gcc_assert (i != 0);
6024 if (!warned && warn_psabi)
6027 inform (input_location,
6028 "the ABI of passing union with long double"
6029 " has changed in GCC 4.4");
6037 /* Compute alignment needed. We align all types to natural boundaries with
6038 exception of XFmode that is aligned to 64bits. */
6039 if (mode != VOIDmode && mode != BLKmode)
6041 int mode_alignment = GET_MODE_BITSIZE (mode);
6044 mode_alignment = 128;
6045 else if (mode == XCmode)
6046 mode_alignment = 256;
6047 if (COMPLEX_MODE_P (mode))
6048 mode_alignment /= 2;
6049 /* Misaligned fields are always returned in memory. */
6050 if (bit_offset % mode_alignment)
6054 /* for V1xx modes, just use the base mode */
6055 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6056 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6057 mode = GET_MODE_INNER (mode);
6059 /* Classification of atomic types. */
6064 classes[0] = X86_64_SSE_CLASS;
6067 classes[0] = X86_64_SSE_CLASS;
6068 classes[1] = X86_64_SSEUP_CLASS;
6078 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6082 classes[0] = X86_64_INTEGERSI_CLASS;
6085 else if (size <= 64)
6087 classes[0] = X86_64_INTEGER_CLASS;
6090 else if (size <= 64+32)
6092 classes[0] = X86_64_INTEGER_CLASS;
6093 classes[1] = X86_64_INTEGERSI_CLASS;
6096 else if (size <= 64+64)
6098 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6106 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6110 /* OImode shouldn't be used directly. */
6115 if (!(bit_offset % 64))
6116 classes[0] = X86_64_SSESF_CLASS;
6118 classes[0] = X86_64_SSE_CLASS;
6121 classes[0] = X86_64_SSEDF_CLASS;
6124 classes[0] = X86_64_X87_CLASS;
6125 classes[1] = X86_64_X87UP_CLASS;
6128 classes[0] = X86_64_SSE_CLASS;
6129 classes[1] = X86_64_SSEUP_CLASS;
6132 classes[0] = X86_64_SSE_CLASS;
6133 if (!(bit_offset % 64))
6139 if (!warned && warn_psabi)
6142 inform (input_location,
6143 "the ABI of passing structure with complex float"
6144 " member has changed in GCC 4.4");
6146 classes[1] = X86_64_SSESF_CLASS;
6150 classes[0] = X86_64_SSEDF_CLASS;
6151 classes[1] = X86_64_SSEDF_CLASS;
6154 classes[0] = X86_64_COMPLEX_X87_CLASS;
6157 /* This modes is larger than 16 bytes. */
6165 classes[0] = X86_64_SSE_CLASS;
6166 classes[1] = X86_64_SSEUP_CLASS;
6167 classes[2] = X86_64_SSEUP_CLASS;
6168 classes[3] = X86_64_SSEUP_CLASS;
6176 classes[0] = X86_64_SSE_CLASS;
6177 classes[1] = X86_64_SSEUP_CLASS;
6185 classes[0] = X86_64_SSE_CLASS;
6191 gcc_assert (VECTOR_MODE_P (mode));
6196 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6198 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6199 classes[0] = X86_64_INTEGERSI_CLASS;
6201 classes[0] = X86_64_INTEGER_CLASS;
6202 classes[1] = X86_64_INTEGER_CLASS;
6203 return 1 + (bytes > 8);
6207 /* Examine the argument and return set number of register required in each
6208 class. Return 0 iff parameter should be passed in memory. */
6210 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6211 int *int_nregs, int *sse_nregs)
6213 enum x86_64_reg_class regclass[MAX_CLASSES];
6214 int n = classify_argument (mode, type, regclass, 0);
6220 for (n--; n >= 0; n--)
6221 switch (regclass[n])
6223 case X86_64_INTEGER_CLASS:
6224 case X86_64_INTEGERSI_CLASS:
6227 case X86_64_SSE_CLASS:
6228 case X86_64_SSESF_CLASS:
6229 case X86_64_SSEDF_CLASS:
6232 case X86_64_NO_CLASS:
6233 case X86_64_SSEUP_CLASS:
6235 case X86_64_X87_CLASS:
6236 case X86_64_X87UP_CLASS:
6240 case X86_64_COMPLEX_X87_CLASS:
6241 return in_return ? 2 : 0;
6242 case X86_64_MEMORY_CLASS:
6248 /* Construct container for the argument used by GCC interface. See
6249 FUNCTION_ARG for the detailed description. */
6252 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6253 const_tree type, int in_return, int nintregs, int nsseregs,
6254 const int *intreg, int sse_regno)
6256 /* The following variables hold the static issued_error state. */
6257 static bool issued_sse_arg_error;
6258 static bool issued_sse_ret_error;
6259 static bool issued_x87_ret_error;
6261 enum machine_mode tmpmode;
6263 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6264 enum x86_64_reg_class regclass[MAX_CLASSES];
6268 int needed_sseregs, needed_intregs;
6269 rtx exp[MAX_CLASSES];
6272 n = classify_argument (mode, type, regclass, 0);
6275 if (!examine_argument (mode, type, in_return, &needed_intregs,
6278 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6281 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6282 some less clueful developer tries to use floating-point anyway. */
6283 if (needed_sseregs && !TARGET_SSE)
6287 if (!issued_sse_ret_error)
6289 error ("SSE register return with SSE disabled");
6290 issued_sse_ret_error = true;
6293 else if (!issued_sse_arg_error)
6295 error ("SSE register argument with SSE disabled");
6296 issued_sse_arg_error = true;
6301 /* Likewise, error if the ABI requires us to return values in the
6302 x87 registers and the user specified -mno-80387. */
6303 if (!TARGET_80387 && in_return)
6304 for (i = 0; i < n; i++)
6305 if (regclass[i] == X86_64_X87_CLASS
6306 || regclass[i] == X86_64_X87UP_CLASS
6307 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6309 if (!issued_x87_ret_error)
6311 error ("x87 register return with x87 disabled");
6312 issued_x87_ret_error = true;
6317 /* First construct simple cases. Avoid SCmode, since we want to use
6318 single register to pass this type. */
6319 if (n == 1 && mode != SCmode)
6320 switch (regclass[0])
6322 case X86_64_INTEGER_CLASS:
6323 case X86_64_INTEGERSI_CLASS:
6324 return gen_rtx_REG (mode, intreg[0]);
6325 case X86_64_SSE_CLASS:
6326 case X86_64_SSESF_CLASS:
6327 case X86_64_SSEDF_CLASS:
6328 if (mode != BLKmode)
6329 return gen_reg_or_parallel (mode, orig_mode,
6330 SSE_REGNO (sse_regno));
6332 case X86_64_X87_CLASS:
6333 case X86_64_COMPLEX_X87_CLASS:
6334 return gen_rtx_REG (mode, FIRST_STACK_REG);
6335 case X86_64_NO_CLASS:
6336 /* Zero sized array, struct or class. */
6341 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6342 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6343 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6345 && regclass[0] == X86_64_SSE_CLASS
6346 && regclass[1] == X86_64_SSEUP_CLASS
6347 && regclass[2] == X86_64_SSEUP_CLASS
6348 && regclass[3] == X86_64_SSEUP_CLASS
6350 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6353 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6354 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6355 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6356 && regclass[1] == X86_64_INTEGER_CLASS
6357 && (mode == CDImode || mode == TImode || mode == TFmode)
6358 && intreg[0] + 1 == intreg[1])
6359 return gen_rtx_REG (mode, intreg[0]);
6361 /* Otherwise figure out the entries of the PARALLEL. */
6362 for (i = 0; i < n; i++)
6366 switch (regclass[i])
6368 case X86_64_NO_CLASS:
6370 case X86_64_INTEGER_CLASS:
6371 case X86_64_INTEGERSI_CLASS:
6372 /* Merge TImodes on aligned occasions here too. */
6373 if (i * 8 + 8 > bytes)
6374 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6375 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6379 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6380 if (tmpmode == BLKmode)
6382 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6383 gen_rtx_REG (tmpmode, *intreg),
6387 case X86_64_SSESF_CLASS:
6388 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6389 gen_rtx_REG (SFmode,
6390 SSE_REGNO (sse_regno)),
6394 case X86_64_SSEDF_CLASS:
6395 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6396 gen_rtx_REG (DFmode,
6397 SSE_REGNO (sse_regno)),
6401 case X86_64_SSE_CLASS:
6409 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6419 && regclass[1] == X86_64_SSEUP_CLASS
6420 && regclass[2] == X86_64_SSEUP_CLASS
6421 && regclass[3] == X86_64_SSEUP_CLASS);
6428 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6429 gen_rtx_REG (tmpmode,
6430 SSE_REGNO (sse_regno)),
6439 /* Empty aligned struct, union or class. */
6443 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6444 for (i = 0; i < nexps; i++)
6445 XVECEXP (ret, 0, i) = exp [i];
6449 /* Update the data in CUM to advance over an argument of mode MODE
6450 and data type TYPE. (TYPE is null for libcalls where that information
6451 may not be available.) */
6454 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6455 const_tree type, HOST_WIDE_INT bytes,
6456 HOST_WIDE_INT words)
6472 cum->words += words;
6473 cum->nregs -= words;
6474 cum->regno += words;
6476 if (cum->nregs <= 0)
6484 /* OImode shouldn't be used directly. */
6488 if (cum->float_in_sse < 2)
6491 if (cum->float_in_sse < 1)
6508 if (!type || !AGGREGATE_TYPE_P (type))
6510 cum->sse_words += words;
6511 cum->sse_nregs -= 1;
6512 cum->sse_regno += 1;
6513 if (cum->sse_nregs <= 0)
6527 if (!type || !AGGREGATE_TYPE_P (type))
6529 cum->mmx_words += words;
6530 cum->mmx_nregs -= 1;
6531 cum->mmx_regno += 1;
6532 if (cum->mmx_nregs <= 0)
6543 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6544 const_tree type, HOST_WIDE_INT words, bool named)
6546 int int_nregs, sse_nregs;
6548 /* Unnamed 256bit vector mode parameters are passed on stack. */
6549 if (!named && VALID_AVX256_REG_MODE (mode))
6552 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6553 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6555 cum->nregs -= int_nregs;
6556 cum->sse_nregs -= sse_nregs;
6557 cum->regno += int_nregs;
6558 cum->sse_regno += sse_nregs;
6562 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6563 cum->words = (cum->words + align - 1) & ~(align - 1);
6564 cum->words += words;
6569 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6570 HOST_WIDE_INT words)
6572 /* Otherwise, this should be passed indirect. */
6573 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6575 cum->words += words;
6583 /* Update the data in CUM to advance over an argument of mode MODE and
6584 data type TYPE. (TYPE is null for libcalls where that information
6585 may not be available.) */
6588 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6589 const_tree type, bool named)
6591 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6592 HOST_WIDE_INT bytes, words;
6594 if (mode == BLKmode)
6595 bytes = int_size_in_bytes (type);
6597 bytes = GET_MODE_SIZE (mode);
6598 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6601 mode = type_natural_mode (type, NULL);
6603 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6604 function_arg_advance_ms_64 (cum, bytes, words);
6605 else if (TARGET_64BIT)
6606 function_arg_advance_64 (cum, mode, type, words, named);
6608 function_arg_advance_32 (cum, mode, type, bytes, words);
6611 /* Define where to put the arguments to a function.
6612 Value is zero to push the argument on the stack,
6613 or a hard register in which to store the argument.
6615 MODE is the argument's machine mode.
6616 TYPE is the data type of the argument (as a tree).
6617 This is null for libcalls where that information may
6619 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6620 the preceding args and about the function being called.
6621 NAMED is nonzero if this argument is a named parameter
6622 (otherwise it is an extra parameter matching an ellipsis). */
6625 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6626 enum machine_mode orig_mode, const_tree type,
6627 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6629 static bool warnedsse, warnedmmx;
6631 /* Avoid the AL settings for the Unix64 ABI. */
6632 if (mode == VOIDmode)
6648 if (words <= cum->nregs)
6650 int regno = cum->regno;
6652 /* Fastcall allocates the first two DWORD (SImode) or
6653 smaller arguments to ECX and EDX if it isn't an
6659 || (type && AGGREGATE_TYPE_P (type)))
6662 /* ECX not EAX is the first allocated register. */
6663 if (regno == AX_REG)
6666 return gen_rtx_REG (mode, regno);
6671 if (cum->float_in_sse < 2)
6674 if (cum->float_in_sse < 1)
6678 /* In 32bit, we pass TImode in xmm registers. */
6685 if (!type || !AGGREGATE_TYPE_P (type))
6687 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6690 warning (0, "SSE vector argument without SSE enabled "
6694 return gen_reg_or_parallel (mode, orig_mode,
6695 cum->sse_regno + FIRST_SSE_REG);
6700 /* OImode shouldn't be used directly. */
6709 if (!type || !AGGREGATE_TYPE_P (type))
6712 return gen_reg_or_parallel (mode, orig_mode,
6713 cum->sse_regno + FIRST_SSE_REG);
6723 if (!type || !AGGREGATE_TYPE_P (type))
6725 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6728 warning (0, "MMX vector argument without MMX enabled "
6732 return gen_reg_or_parallel (mode, orig_mode,
6733 cum->mmx_regno + FIRST_MMX_REG);
6742 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6743 enum machine_mode orig_mode, const_tree type, bool named)
6745 /* Handle a hidden AL argument containing number of registers
6746 for varargs x86-64 functions. */
6747 if (mode == VOIDmode)
6748 return GEN_INT (cum->maybe_vaarg
6749 ? (cum->sse_nregs < 0
6750 ? X86_64_SSE_REGPARM_MAX
6765 /* Unnamed 256bit vector mode parameters are passed on stack. */
6771 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6773 &x86_64_int_parameter_registers [cum->regno],
6778 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6779 enum machine_mode orig_mode, bool named,
6780 HOST_WIDE_INT bytes)
6784 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6785 We use value of -2 to specify that current function call is MSABI. */
6786 if (mode == VOIDmode)
6787 return GEN_INT (-2);
6789 /* If we've run out of registers, it goes on the stack. */
6790 if (cum->nregs == 0)
6793 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6795 /* Only floating point modes are passed in anything but integer regs. */
6796 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6799 regno = cum->regno + FIRST_SSE_REG;
6804 /* Unnamed floating parameters are passed in both the
6805 SSE and integer registers. */
6806 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6807 t2 = gen_rtx_REG (mode, regno);
6808 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6809 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6810 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6813 /* Handle aggregated types passed in register. */
6814 if (orig_mode == BLKmode)
6816 if (bytes > 0 && bytes <= 8)
6817 mode = (bytes > 4 ? DImode : SImode);
6818 if (mode == BLKmode)
6822 return gen_reg_or_parallel (mode, orig_mode, regno);
6825 /* Return where to put the arguments to a function.
6826 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6828 MODE is the argument's machine mode. TYPE is the data type of the
6829 argument. It is null for libcalls where that information may not be
6830 available. CUM gives information about the preceding args and about
6831 the function being called. NAMED is nonzero if this argument is a
6832 named parameter (otherwise it is an extra parameter matching an
6836 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6837 const_tree type, bool named)
6839 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6840 enum machine_mode mode = omode;
6841 HOST_WIDE_INT bytes, words;
6844 if (mode == BLKmode)
6845 bytes = int_size_in_bytes (type);
6847 bytes = GET_MODE_SIZE (mode);
6848 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6850 /* To simplify the code below, represent vector types with a vector mode
6851 even if MMX/SSE are not active. */
6852 if (type && TREE_CODE (type) == VECTOR_TYPE)
6853 mode = type_natural_mode (type, cum);
6855 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6856 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6857 else if (TARGET_64BIT)
6858 arg = function_arg_64 (cum, mode, omode, type, named);
6860 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6862 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6864 /* This argument uses 256bit AVX modes. */
6866 cfun->machine->callee_pass_avx256_p = true;
6868 cfun->machine->caller_pass_avx256_p = true;
6874 /* A C expression that indicates when an argument must be passed by
6875 reference. If nonzero for an argument, a copy of that argument is
6876 made in memory and a pointer to the argument is passed instead of
6877 the argument itself. The pointer is passed in whatever way is
6878 appropriate for passing a pointer to that type. */
6881 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6882 enum machine_mode mode ATTRIBUTE_UNUSED,
6883 const_tree type, bool named ATTRIBUTE_UNUSED)
6885 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6887 /* See Windows x64 Software Convention. */
6888 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6890 int msize = (int) GET_MODE_SIZE (mode);
6893 /* Arrays are passed by reference. */
6894 if (TREE_CODE (type) == ARRAY_TYPE)
6897 if (AGGREGATE_TYPE_P (type))
6899 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6900 are passed by reference. */
6901 msize = int_size_in_bytes (type);
6905 /* __m128 is passed by reference. */
6907 case 1: case 2: case 4: case 8:
6913 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6919 /* Return true when TYPE should be 128bit aligned for 32bit argument
6920 passing ABI. XXX: This function is obsolete and is only used for
6921 checking psABI compatibility with previous versions of GCC. */
6924 ix86_compat_aligned_value_p (const_tree type)
6926 enum machine_mode mode = TYPE_MODE (type);
6927 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6931 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6933 if (TYPE_ALIGN (type) < 128)
6936 if (AGGREGATE_TYPE_P (type))
6938 /* Walk the aggregates recursively. */
6939 switch (TREE_CODE (type))
6943 case QUAL_UNION_TYPE:
6947 /* Walk all the structure fields. */
6948 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6950 if (TREE_CODE (field) == FIELD_DECL
6951 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6958 /* Just for use if some languages passes arrays by value. */
6959 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6970 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6971 XXX: This function is obsolete and is only used for checking psABI
6972 compatibility with previous versions of GCC. */
6975 ix86_compat_function_arg_boundary (enum machine_mode mode,
6976 const_tree type, unsigned int align)
6978 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6979 natural boundaries. */
6980 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6982 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6983 make an exception for SSE modes since these require 128bit
6986 The handling here differs from field_alignment. ICC aligns MMX
6987 arguments to 4 byte boundaries, while structure fields are aligned
6988 to 8 byte boundaries. */
6991 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6992 align = PARM_BOUNDARY;
6996 if (!ix86_compat_aligned_value_p (type))
6997 align = PARM_BOUNDARY;
7000 if (align > BIGGEST_ALIGNMENT)
7001 align = BIGGEST_ALIGNMENT;
7005 /* Return true when TYPE should be 128bit aligned for 32bit argument
7009 ix86_contains_aligned_value_p (const_tree type)
7011 enum machine_mode mode = TYPE_MODE (type);
7013 if (mode == XFmode || mode == XCmode)
7016 if (TYPE_ALIGN (type) < 128)
7019 if (AGGREGATE_TYPE_P (type))
7021 /* Walk the aggregates recursively. */
7022 switch (TREE_CODE (type))
7026 case QUAL_UNION_TYPE:
7030 /* Walk all the structure fields. */
7031 for (field = TYPE_FIELDS (type);
7033 field = DECL_CHAIN (field))
7035 if (TREE_CODE (field) == FIELD_DECL
7036 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7043 /* Just for use if some languages passes arrays by value. */
7044 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7053 return TYPE_ALIGN (type) >= 128;
7058 /* Gives the alignment boundary, in bits, of an argument with the
7059 specified mode and type. */
7062 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7067 /* Since the main variant type is used for call, we convert it to
7068 the main variant type. */
7069 type = TYPE_MAIN_VARIANT (type);
7070 align = TYPE_ALIGN (type);
7073 align = GET_MODE_ALIGNMENT (mode);
7074 if (align < PARM_BOUNDARY)
7075 align = PARM_BOUNDARY;
7079 unsigned int saved_align = align;
7083 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7086 if (mode == XFmode || mode == XCmode)
7087 align = PARM_BOUNDARY;
7089 else if (!ix86_contains_aligned_value_p (type))
7090 align = PARM_BOUNDARY;
7093 align = PARM_BOUNDARY;
7098 && align != ix86_compat_function_arg_boundary (mode, type,
7102 inform (input_location,
7103 "The ABI for passing parameters with %d-byte"
7104 " alignment has changed in GCC 4.6",
7105 align / BITS_PER_UNIT);
7112 /* Return true if N is a possible register number of function value. */
7115 ix86_function_value_regno_p (const unsigned int regno)
7122 case FIRST_FLOAT_REG:
7123 /* TODO: The function should depend on current function ABI but
7124 builtins.c would need updating then. Therefore we use the
7126 if (TARGET_64BIT && ix86_abi == MS_ABI)
7128 return TARGET_FLOAT_RETURNS_IN_80387;
7134 if (TARGET_MACHO || TARGET_64BIT)
7142 /* Define how to find the value returned by a function.
7143 VALTYPE is the data type of the value (as a tree).
7144 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7145 otherwise, FUNC is 0. */
7148 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7149 const_tree fntype, const_tree fn)
7153 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7154 we normally prevent this case when mmx is not available. However
7155 some ABIs may require the result to be returned like DImode. */
7156 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7157 regno = FIRST_MMX_REG;
7159 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7160 we prevent this case when sse is not available. However some ABIs
7161 may require the result to be returned like integer TImode. */
7162 else if (mode == TImode
7163 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7164 regno = FIRST_SSE_REG;
7166 /* 32-byte vector modes in %ymm0. */
7167 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7168 regno = FIRST_SSE_REG;
7170 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7171 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7172 regno = FIRST_FLOAT_REG;
7174 /* Most things go in %eax. */
7177 /* Override FP return register with %xmm0 for local functions when
7178 SSE math is enabled or for functions with sseregparm attribute. */
7179 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7181 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7182 if ((sse_level >= 1 && mode == SFmode)
7183 || (sse_level == 2 && mode == DFmode))
7184 regno = FIRST_SSE_REG;
7187 /* OImode shouldn't be used directly. */
7188 gcc_assert (mode != OImode);
7190 return gen_rtx_REG (orig_mode, regno);
7194 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7199 /* Handle libcalls, which don't provide a type node. */
7200 if (valtype == NULL)
7214 regno = FIRST_SSE_REG;
7218 regno = FIRST_FLOAT_REG;
7226 return gen_rtx_REG (mode, regno);
7228 else if (POINTER_TYPE_P (valtype))
7230 /* Pointers are always returned in Pmode. */
7234 ret = construct_container (mode, orig_mode, valtype, 1,
7235 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7236 x86_64_int_return_registers, 0);
7238 /* For zero sized structures, construct_container returns NULL, but we
7239 need to keep rest of compiler happy by returning meaningful value. */
7241 ret = gen_rtx_REG (orig_mode, AX_REG);
7247 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7249 unsigned int regno = AX_REG;
7253 switch (GET_MODE_SIZE (mode))
7256 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7257 && !COMPLEX_MODE_P (mode))
7258 regno = FIRST_SSE_REG;
7262 if (mode == SFmode || mode == DFmode)
7263 regno = FIRST_SSE_REG;
7269 return gen_rtx_REG (orig_mode, regno);
7273 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7274 enum machine_mode orig_mode, enum machine_mode mode)
7276 const_tree fn, fntype;
7279 if (fntype_or_decl && DECL_P (fntype_or_decl))
7280 fn = fntype_or_decl;
7281 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7283 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7284 return function_value_ms_64 (orig_mode, mode);
7285 else if (TARGET_64BIT)
7286 return function_value_64 (orig_mode, mode, valtype);
7288 return function_value_32 (orig_mode, mode, fntype, fn);
7292 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7293 bool outgoing ATTRIBUTE_UNUSED)
7295 enum machine_mode mode, orig_mode;
7297 orig_mode = TYPE_MODE (valtype);
7298 mode = type_natural_mode (valtype, NULL);
7299 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7302 /* Pointer function arguments and return values are promoted to Pmode. */
7304 static enum machine_mode
7305 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7306 int *punsignedp, const_tree fntype,
7309 if (type != NULL_TREE && POINTER_TYPE_P (type))
7311 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7314 return default_promote_function_mode (type, mode, punsignedp, fntype,
7319 ix86_libcall_value (enum machine_mode mode)
7321 return ix86_function_value_1 (NULL, NULL, mode, mode);
7324 /* Return true iff type is returned in memory. */
7326 static bool ATTRIBUTE_UNUSED
7327 return_in_memory_32 (const_tree type, enum machine_mode mode)
7331 if (mode == BLKmode)
7334 size = int_size_in_bytes (type);
7336 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7339 if (VECTOR_MODE_P (mode) || mode == TImode)
7341 /* User-created vectors small enough to fit in EAX. */
7345 /* MMX/3dNow values are returned in MM0,
7346 except when it doesn't exits or the ABI prescribes otherwise. */
7348 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7350 /* SSE values are returned in XMM0, except when it doesn't exist. */
7354 /* AVX values are returned in YMM0, except when it doesn't exist. */
7365 /* OImode shouldn't be used directly. */
7366 gcc_assert (mode != OImode);
7371 static bool ATTRIBUTE_UNUSED
7372 return_in_memory_64 (const_tree type, enum machine_mode mode)
7374 int needed_intregs, needed_sseregs;
7375 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7378 static bool ATTRIBUTE_UNUSED
7379 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7381 HOST_WIDE_INT size = int_size_in_bytes (type);
7383 /* __m128 is returned in xmm0. */
7384 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7385 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7388 /* Otherwise, the size must be exactly in [1248]. */
7389 return size != 1 && size != 2 && size != 4 && size != 8;
7393 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7395 #ifdef SUBTARGET_RETURN_IN_MEMORY
7396 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7398 const enum machine_mode mode = type_natural_mode (type, NULL);
7402 if (ix86_function_type_abi (fntype) == MS_ABI)
7403 return return_in_memory_ms_64 (type, mode);
7405 return return_in_memory_64 (type, mode);
7408 return return_in_memory_32 (type, mode);
7412 /* When returning SSE vector types, we have a choice of either
7413 (1) being abi incompatible with a -march switch, or
7414 (2) generating an error.
7415 Given no good solution, I think the safest thing is one warning.
7416 The user won't be able to use -Werror, but....
7418 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7419 called in response to actually generating a caller or callee that
7420 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7421 via aggregate_value_p for general type probing from tree-ssa. */
7424 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7426 static bool warnedsse, warnedmmx;
7428 if (!TARGET_64BIT && type)
7430 /* Look at the return type of the function, not the function type. */
7431 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7433 if (!TARGET_SSE && !warnedsse)
7436 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7439 warning (0, "SSE vector return without SSE enabled "
7444 if (!TARGET_MMX && !warnedmmx)
7446 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7449 warning (0, "MMX vector return without MMX enabled "
7459 /* Create the va_list data type. */
7461 /* Returns the calling convention specific va_list date type.
7462 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7465 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7467 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7469 /* For i386 we use plain pointer to argument area. */
7470 if (!TARGET_64BIT || abi == MS_ABI)
7471 return build_pointer_type (char_type_node);
7473 record = lang_hooks.types.make_type (RECORD_TYPE);
7474 type_decl = build_decl (BUILTINS_LOCATION,
7475 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7477 f_gpr = build_decl (BUILTINS_LOCATION,
7478 FIELD_DECL, get_identifier ("gp_offset"),
7479 unsigned_type_node);
7480 f_fpr = build_decl (BUILTINS_LOCATION,
7481 FIELD_DECL, get_identifier ("fp_offset"),
7482 unsigned_type_node);
7483 f_ovf = build_decl (BUILTINS_LOCATION,
7484 FIELD_DECL, get_identifier ("overflow_arg_area"),
7486 f_sav = build_decl (BUILTINS_LOCATION,
7487 FIELD_DECL, get_identifier ("reg_save_area"),
7490 va_list_gpr_counter_field = f_gpr;
7491 va_list_fpr_counter_field = f_fpr;
7493 DECL_FIELD_CONTEXT (f_gpr) = record;
7494 DECL_FIELD_CONTEXT (f_fpr) = record;
7495 DECL_FIELD_CONTEXT (f_ovf) = record;
7496 DECL_FIELD_CONTEXT (f_sav) = record;
7498 TYPE_STUB_DECL (record) = type_decl;
7499 TYPE_NAME (record) = type_decl;
7500 TYPE_FIELDS (record) = f_gpr;
7501 DECL_CHAIN (f_gpr) = f_fpr;
7502 DECL_CHAIN (f_fpr) = f_ovf;
7503 DECL_CHAIN (f_ovf) = f_sav;
7505 layout_type (record);
7507 /* The correct type is an array type of one element. */
7508 return build_array_type (record, build_index_type (size_zero_node));
7511 /* Setup the builtin va_list data type and for 64-bit the additional
7512 calling convention specific va_list data types. */
7515 ix86_build_builtin_va_list (void)
7517 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7519 /* Initialize abi specific va_list builtin types. */
7523 if (ix86_abi == MS_ABI)
7525 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7526 if (TREE_CODE (t) != RECORD_TYPE)
7527 t = build_variant_type_copy (t);
7528 sysv_va_list_type_node = t;
7533 if (TREE_CODE (t) != RECORD_TYPE)
7534 t = build_variant_type_copy (t);
7535 sysv_va_list_type_node = t;
7537 if (ix86_abi != MS_ABI)
7539 t = ix86_build_builtin_va_list_abi (MS_ABI);
7540 if (TREE_CODE (t) != RECORD_TYPE)
7541 t = build_variant_type_copy (t);
7542 ms_va_list_type_node = t;
7547 if (TREE_CODE (t) != RECORD_TYPE)
7548 t = build_variant_type_copy (t);
7549 ms_va_list_type_node = t;
7556 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7559 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7565 /* GPR size of varargs save area. */
7566 if (cfun->va_list_gpr_size)
7567 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7569 ix86_varargs_gpr_size = 0;
7571 /* FPR size of varargs save area. We don't need it if we don't pass
7572 anything in SSE registers. */
7573 if (TARGET_SSE && cfun->va_list_fpr_size)
7574 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7576 ix86_varargs_fpr_size = 0;
7578 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7581 save_area = frame_pointer_rtx;
7582 set = get_varargs_alias_set ();
7584 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7585 if (max > X86_64_REGPARM_MAX)
7586 max = X86_64_REGPARM_MAX;
7588 for (i = cum->regno; i < max; i++)
7590 mem = gen_rtx_MEM (Pmode,
7591 plus_constant (save_area, i * UNITS_PER_WORD));
7592 MEM_NOTRAP_P (mem) = 1;
7593 set_mem_alias_set (mem, set);
7594 emit_move_insn (mem, gen_rtx_REG (Pmode,
7595 x86_64_int_parameter_registers[i]));
7598 if (ix86_varargs_fpr_size)
7600 enum machine_mode smode;
7603 /* Now emit code to save SSE registers. The AX parameter contains number
7604 of SSE parameter registers used to call this function, though all we
7605 actually check here is the zero/non-zero status. */
7607 label = gen_label_rtx ();
7608 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7609 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7612 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7613 we used movdqa (i.e. TImode) instead? Perhaps even better would
7614 be if we could determine the real mode of the data, via a hook
7615 into pass_stdarg. Ignore all that for now. */
7617 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7618 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7620 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7621 if (max > X86_64_SSE_REGPARM_MAX)
7622 max = X86_64_SSE_REGPARM_MAX;
7624 for (i = cum->sse_regno; i < max; ++i)
7626 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7627 mem = gen_rtx_MEM (smode, mem);
7628 MEM_NOTRAP_P (mem) = 1;
7629 set_mem_alias_set (mem, set);
7630 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7632 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7640 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7642 alias_set_type set = get_varargs_alias_set ();
7645 /* Reset to zero, as there might be a sysv vaarg used
7647 ix86_varargs_gpr_size = 0;
7648 ix86_varargs_fpr_size = 0;
7650 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7654 mem = gen_rtx_MEM (Pmode,
7655 plus_constant (virtual_incoming_args_rtx,
7656 i * UNITS_PER_WORD));
7657 MEM_NOTRAP_P (mem) = 1;
7658 set_mem_alias_set (mem, set);
7660 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7661 emit_move_insn (mem, reg);
7666 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7667 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7670 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7671 CUMULATIVE_ARGS next_cum;
7674 /* This argument doesn't appear to be used anymore. Which is good,
7675 because the old code here didn't suppress rtl generation. */
7676 gcc_assert (!no_rtl);
7681 fntype = TREE_TYPE (current_function_decl);
7683 /* For varargs, we do not want to skip the dummy va_dcl argument.
7684 For stdargs, we do want to skip the last named argument. */
7686 if (stdarg_p (fntype))
7687 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7690 if (cum->call_abi == MS_ABI)
7691 setup_incoming_varargs_ms_64 (&next_cum);
7693 setup_incoming_varargs_64 (&next_cum);
7696 /* Checks if TYPE is of kind va_list char *. */
7699 is_va_list_char_pointer (tree type)
7703 /* For 32-bit it is always true. */
7706 canonic = ix86_canonical_va_list_type (type);
7707 return (canonic == ms_va_list_type_node
7708 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7711 /* Implement va_start. */
7714 ix86_va_start (tree valist, rtx nextarg)
7716 HOST_WIDE_INT words, n_gpr, n_fpr;
7717 tree f_gpr, f_fpr, f_ovf, f_sav;
7718 tree gpr, fpr, ovf, sav, t;
7722 if (flag_split_stack
7723 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7725 unsigned int scratch_regno;
7727 /* When we are splitting the stack, we can't refer to the stack
7728 arguments using internal_arg_pointer, because they may be on
7729 the old stack. The split stack prologue will arrange to
7730 leave a pointer to the old stack arguments in a scratch
7731 register, which we here copy to a pseudo-register. The split
7732 stack prologue can't set the pseudo-register directly because
7733 it (the prologue) runs before any registers have been saved. */
7735 scratch_regno = split_stack_prologue_scratch_regno ();
7736 if (scratch_regno != INVALID_REGNUM)
7740 reg = gen_reg_rtx (Pmode);
7741 cfun->machine->split_stack_varargs_pointer = reg;
7744 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7748 push_topmost_sequence ();
7749 emit_insn_after (seq, entry_of_function ());
7750 pop_topmost_sequence ();
7754 /* Only 64bit target needs something special. */
7755 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7757 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7758 std_expand_builtin_va_start (valist, nextarg);
7763 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7764 next = expand_binop (ptr_mode, add_optab,
7765 cfun->machine->split_stack_varargs_pointer,
7766 crtl->args.arg_offset_rtx,
7767 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7768 convert_move (va_r, next, 0);
7773 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7774 f_fpr = DECL_CHAIN (f_gpr);
7775 f_ovf = DECL_CHAIN (f_fpr);
7776 f_sav = DECL_CHAIN (f_ovf);
7778 valist = build_simple_mem_ref (valist);
7779 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7780 /* The following should be folded into the MEM_REF offset. */
7781 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7783 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7785 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7787 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7790 /* Count number of gp and fp argument registers used. */
7791 words = crtl->args.info.words;
7792 n_gpr = crtl->args.info.regno;
7793 n_fpr = crtl->args.info.sse_regno;
7795 if (cfun->va_list_gpr_size)
7797 type = TREE_TYPE (gpr);
7798 t = build2 (MODIFY_EXPR, type,
7799 gpr, build_int_cst (type, n_gpr * 8));
7800 TREE_SIDE_EFFECTS (t) = 1;
7801 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7804 if (TARGET_SSE && cfun->va_list_fpr_size)
7806 type = TREE_TYPE (fpr);
7807 t = build2 (MODIFY_EXPR, type, fpr,
7808 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7809 TREE_SIDE_EFFECTS (t) = 1;
7810 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7813 /* Find the overflow area. */
7814 type = TREE_TYPE (ovf);
7815 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7816 ovf_rtx = crtl->args.internal_arg_pointer;
7818 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7819 t = make_tree (type, ovf_rtx);
7821 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7822 t = build2 (MODIFY_EXPR, type, ovf, t);
7823 TREE_SIDE_EFFECTS (t) = 1;
7824 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7826 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7828 /* Find the register save area.
7829 Prologue of the function save it right above stack frame. */
7830 type = TREE_TYPE (sav);
7831 t = make_tree (type, frame_pointer_rtx);
7832 if (!ix86_varargs_gpr_size)
7833 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7834 t = build2 (MODIFY_EXPR, type, sav, t);
7835 TREE_SIDE_EFFECTS (t) = 1;
7836 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7840 /* Implement va_arg. */
7843 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7846 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7847 tree f_gpr, f_fpr, f_ovf, f_sav;
7848 tree gpr, fpr, ovf, sav, t;
7850 tree lab_false, lab_over = NULL_TREE;
7855 enum machine_mode nat_mode;
7856 unsigned int arg_boundary;
7858 /* Only 64bit target needs something special. */
7859 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7860 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7862 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7863 f_fpr = DECL_CHAIN (f_gpr);
7864 f_ovf = DECL_CHAIN (f_fpr);
7865 f_sav = DECL_CHAIN (f_ovf);
7867 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7868 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7869 valist = build_va_arg_indirect_ref (valist);
7870 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7871 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7872 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7874 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7876 type = build_pointer_type (type);
7877 size = int_size_in_bytes (type);
7878 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7880 nat_mode = type_natural_mode (type, NULL);
7889 /* Unnamed 256bit vector mode parameters are passed on stack. */
7890 if (!TARGET_64BIT_MS_ABI)
7897 container = construct_container (nat_mode, TYPE_MODE (type),
7898 type, 0, X86_64_REGPARM_MAX,
7899 X86_64_SSE_REGPARM_MAX, intreg,
7904 /* Pull the value out of the saved registers. */
7906 addr = create_tmp_var (ptr_type_node, "addr");
7910 int needed_intregs, needed_sseregs;
7912 tree int_addr, sse_addr;
7914 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7915 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7917 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7919 need_temp = (!REG_P (container)
7920 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7921 || TYPE_ALIGN (type) > 128));
7923 /* In case we are passing structure, verify that it is consecutive block
7924 on the register save area. If not we need to do moves. */
7925 if (!need_temp && !REG_P (container))
7927 /* Verify that all registers are strictly consecutive */
7928 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7932 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7934 rtx slot = XVECEXP (container, 0, i);
7935 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7936 || INTVAL (XEXP (slot, 1)) != i * 16)
7944 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7946 rtx slot = XVECEXP (container, 0, i);
7947 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7948 || INTVAL (XEXP (slot, 1)) != i * 8)
7960 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7961 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7964 /* First ensure that we fit completely in registers. */
7967 t = build_int_cst (TREE_TYPE (gpr),
7968 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7969 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7970 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7971 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7972 gimplify_and_add (t, pre_p);
7976 t = build_int_cst (TREE_TYPE (fpr),
7977 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7978 + X86_64_REGPARM_MAX * 8);
7979 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7980 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7981 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7982 gimplify_and_add (t, pre_p);
7985 /* Compute index to start of area used for integer regs. */
7988 /* int_addr = gpr + sav; */
7989 t = fold_build_pointer_plus (sav, gpr);
7990 gimplify_assign (int_addr, t, pre_p);
7994 /* sse_addr = fpr + sav; */
7995 t = fold_build_pointer_plus (sav, fpr);
7996 gimplify_assign (sse_addr, t, pre_p);
8000 int i, prev_size = 0;
8001 tree temp = create_tmp_var (type, "va_arg_tmp");
8004 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8005 gimplify_assign (addr, t, pre_p);
8007 for (i = 0; i < XVECLEN (container, 0); i++)
8009 rtx slot = XVECEXP (container, 0, i);
8010 rtx reg = XEXP (slot, 0);
8011 enum machine_mode mode = GET_MODE (reg);
8017 tree dest_addr, dest;
8018 int cur_size = GET_MODE_SIZE (mode);
8020 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8021 prev_size = INTVAL (XEXP (slot, 1));
8022 if (prev_size + cur_size > size)
8024 cur_size = size - prev_size;
8025 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8026 if (mode == BLKmode)
8029 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8030 if (mode == GET_MODE (reg))
8031 addr_type = build_pointer_type (piece_type);
8033 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8035 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8038 if (SSE_REGNO_P (REGNO (reg)))
8040 src_addr = sse_addr;
8041 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8045 src_addr = int_addr;
8046 src_offset = REGNO (reg) * 8;
8048 src_addr = fold_convert (addr_type, src_addr);
8049 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8051 dest_addr = fold_convert (daddr_type, addr);
8052 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8053 if (cur_size == GET_MODE_SIZE (mode))
8055 src = build_va_arg_indirect_ref (src_addr);
8056 dest = build_va_arg_indirect_ref (dest_addr);
8058 gimplify_assign (dest, src, pre_p);
8063 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8064 3, dest_addr, src_addr,
8065 size_int (cur_size));
8066 gimplify_and_add (copy, pre_p);
8068 prev_size += cur_size;
8074 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8075 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8076 gimplify_assign (gpr, t, pre_p);
8081 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8082 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8083 gimplify_assign (fpr, t, pre_p);
8086 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8088 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8091 /* ... otherwise out of the overflow area. */
8093 /* When we align parameter on stack for caller, if the parameter
8094 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8095 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8096 here with caller. */
8097 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8098 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8099 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8101 /* Care for on-stack alignment if needed. */
8102 if (arg_boundary <= 64 || size == 0)
8106 HOST_WIDE_INT align = arg_boundary / 8;
8107 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8108 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8109 build_int_cst (TREE_TYPE (t), -align));
8112 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8113 gimplify_assign (addr, t, pre_p);
8115 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8116 gimplify_assign (unshare_expr (ovf), t, pre_p);
8119 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8121 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8122 addr = fold_convert (ptrtype, addr);
8125 addr = build_va_arg_indirect_ref (addr);
8126 return build_va_arg_indirect_ref (addr);
8129 /* Return true if OPNUM's MEM should be matched
8130 in movabs* patterns. */
8133 ix86_check_movabs (rtx insn, int opnum)
8137 set = PATTERN (insn);
8138 if (GET_CODE (set) == PARALLEL)
8139 set = XVECEXP (set, 0, 0);
8140 gcc_assert (GET_CODE (set) == SET);
8141 mem = XEXP (set, opnum);
8142 while (GET_CODE (mem) == SUBREG)
8143 mem = SUBREG_REG (mem);
8144 gcc_assert (MEM_P (mem));
8145 return volatile_ok || !MEM_VOLATILE_P (mem);
8148 /* Initialize the table of extra 80387 mathematical constants. */
8151 init_ext_80387_constants (void)
8153 static const char * cst[5] =
8155 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8156 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8157 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8158 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8159 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8163 for (i = 0; i < 5; i++)
8165 real_from_string (&ext_80387_constants_table[i], cst[i]);
8166 /* Ensure each constant is rounded to XFmode precision. */
8167 real_convert (&ext_80387_constants_table[i],
8168 XFmode, &ext_80387_constants_table[i]);
8171 ext_80387_constants_init = 1;
8174 /* Return non-zero if the constant is something that
8175 can be loaded with a special instruction. */
8178 standard_80387_constant_p (rtx x)
8180 enum machine_mode mode = GET_MODE (x);
8184 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8187 if (x == CONST0_RTX (mode))
8189 if (x == CONST1_RTX (mode))
8192 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8194 /* For XFmode constants, try to find a special 80387 instruction when
8195 optimizing for size or on those CPUs that benefit from them. */
8197 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8201 if (! ext_80387_constants_init)
8202 init_ext_80387_constants ();
8204 for (i = 0; i < 5; i++)
8205 if (real_identical (&r, &ext_80387_constants_table[i]))
8209 /* Load of the constant -0.0 or -1.0 will be split as
8210 fldz;fchs or fld1;fchs sequence. */
8211 if (real_isnegzero (&r))
8213 if (real_identical (&r, &dconstm1))
8219 /* Return the opcode of the special instruction to be used to load
8223 standard_80387_constant_opcode (rtx x)
8225 switch (standard_80387_constant_p (x))
8249 /* Return the CONST_DOUBLE representing the 80387 constant that is
8250 loaded by the specified special instruction. The argument IDX
8251 matches the return value from standard_80387_constant_p. */
8254 standard_80387_constant_rtx (int idx)
8258 if (! ext_80387_constants_init)
8259 init_ext_80387_constants ();
8275 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8279 /* Return 1 if X is all 0s and 2 if x is all 1s
8280 in supported SSE/AVX vector mode. */
8283 standard_sse_constant_p (rtx x)
8285 enum machine_mode mode = GET_MODE (x);
8287 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8289 if (vector_all_ones_operand (x, mode))
8311 /* Return the opcode of the special instruction to be used to load
8315 standard_sse_constant_opcode (rtx insn, rtx x)
8317 switch (standard_sse_constant_p (x))
8320 switch (get_attr_mode (insn))
8323 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8324 return "%vpxor\t%0, %d0";
8326 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8327 return "%vxorpd\t%0, %d0";
8329 return "%vxorps\t%0, %d0";
8332 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8333 return "vpxor\t%x0, %x0, %x0";
8335 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8336 return "vxorpd\t%x0, %x0, %x0";
8338 return "vxorps\t%x0, %x0, %x0";
8346 return "vpcmpeqd\t%0, %0, %0";
8348 return "pcmpeqd\t%0, %0";
8356 /* Returns true if OP contains a symbol reference */
8359 symbolic_reference_mentioned_p (rtx op)
8364 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8367 fmt = GET_RTX_FORMAT (GET_CODE (op));
8368 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8374 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8375 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8379 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8386 /* Return true if it is appropriate to emit `ret' instructions in the
8387 body of a function. Do this only if the epilogue is simple, needing a
8388 couple of insns. Prior to reloading, we can't tell how many registers
8389 must be saved, so return false then. Return false if there is no frame
8390 marker to de-allocate. */
8393 ix86_can_use_return_insn_p (void)
8395 struct ix86_frame frame;
8397 if (! reload_completed || frame_pointer_needed)
8400 /* Don't allow more than 32k pop, since that's all we can do
8401 with one instruction. */
8402 if (crtl->args.pops_args && crtl->args.size >= 32768)
8405 ix86_compute_frame_layout (&frame);
8406 return (frame.stack_pointer_offset == UNITS_PER_WORD
8407 && (frame.nregs + frame.nsseregs) == 0);
8410 /* Value should be nonzero if functions must have frame pointers.
8411 Zero means the frame pointer need not be set up (and parms may
8412 be accessed via the stack pointer) in functions that seem suitable. */
8415 ix86_frame_pointer_required (void)
8417 /* If we accessed previous frames, then the generated code expects
8418 to be able to access the saved ebp value in our frame. */
8419 if (cfun->machine->accesses_prev_frame)
8422 /* Several x86 os'es need a frame pointer for other reasons,
8423 usually pertaining to setjmp. */
8424 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8427 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8428 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
8431 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8432 allocation is 4GB. */
8433 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
8436 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8437 turns off the frame pointer by default. Turn it back on now if
8438 we've not got a leaf function. */
8439 if (TARGET_OMIT_LEAF_FRAME_POINTER
8440 && (!current_function_is_leaf
8441 || ix86_current_function_calls_tls_descriptor))
8444 if (crtl->profile && !flag_fentry)
8450 /* Record that the current function accesses previous call frames. */
8453 ix86_setup_frame_addresses (void)
8455 cfun->machine->accesses_prev_frame = 1;
8458 #ifndef USE_HIDDEN_LINKONCE
8459 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8460 # define USE_HIDDEN_LINKONCE 1
8462 # define USE_HIDDEN_LINKONCE 0
8466 static int pic_labels_used;
8468 /* Fills in the label name that should be used for a pc thunk for
8469 the given register. */
8472 get_pc_thunk_name (char name[32], unsigned int regno)
8474 gcc_assert (!TARGET_64BIT);
8476 if (USE_HIDDEN_LINKONCE)
8477 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
8479 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8483 /* This function generates code for -fpic that loads %ebx with
8484 the return address of the caller and then returns. */
8487 ix86_code_end (void)
8492 for (regno = AX_REG; regno <= SP_REG; regno++)
8497 if (!(pic_labels_used & (1 << regno)))
8500 get_pc_thunk_name (name, regno);
8502 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8503 get_identifier (name),
8504 build_function_type_list (void_type_node, NULL_TREE));
8505 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8506 NULL_TREE, void_type_node);
8507 TREE_PUBLIC (decl) = 1;
8508 TREE_STATIC (decl) = 1;
8513 switch_to_section (darwin_sections[text_coal_section]);
8514 fputs ("\t.weak_definition\t", asm_out_file);
8515 assemble_name (asm_out_file, name);
8516 fputs ("\n\t.private_extern\t", asm_out_file);
8517 assemble_name (asm_out_file, name);
8518 putc ('\n', asm_out_file);
8519 ASM_OUTPUT_LABEL (asm_out_file, name);
8520 DECL_WEAK (decl) = 1;
8524 if (USE_HIDDEN_LINKONCE)
8526 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8528 targetm.asm_out.unique_section (decl, 0);
8529 switch_to_section (get_named_section (decl, NULL, 0));
8531 targetm.asm_out.globalize_label (asm_out_file, name);
8532 fputs ("\t.hidden\t", asm_out_file);
8533 assemble_name (asm_out_file, name);
8534 putc ('\n', asm_out_file);
8535 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8539 switch_to_section (text_section);
8540 ASM_OUTPUT_LABEL (asm_out_file, name);
8543 DECL_INITIAL (decl) = make_node (BLOCK);
8544 current_function_decl = decl;
8545 init_function_start (decl);
8546 first_function_block_is_cold = false;
8547 /* Make sure unwind info is emitted for the thunk if needed. */
8548 final_start_function (emit_barrier (), asm_out_file, 1);
8550 /* Pad stack IP move with 4 instructions (two NOPs count
8551 as one instruction). */
8552 if (TARGET_PAD_SHORT_FUNCTION)
8557 fputs ("\tnop\n", asm_out_file);
8560 xops[0] = gen_rtx_REG (Pmode, regno);
8561 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8562 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8563 fputs ("\tret\n", asm_out_file);
8564 final_end_function ();
8565 init_insn_lengths ();
8566 free_after_compilation (cfun);
8568 current_function_decl = NULL;
8571 if (flag_split_stack)
8572 file_end_indicate_split_stack ();
8575 /* Emit code for the SET_GOT patterns. */
8578 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8584 if (TARGET_VXWORKS_RTP && flag_pic)
8586 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8587 xops[2] = gen_rtx_MEM (Pmode,
8588 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8589 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8591 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8592 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8593 an unadorned address. */
8594 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8595 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8596 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8600 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8604 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8606 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8609 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8610 is what will be referenced by the Mach-O PIC subsystem. */
8612 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8615 targetm.asm_out.internal_label (asm_out_file, "L",
8616 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8621 get_pc_thunk_name (name, REGNO (dest));
8622 pic_labels_used |= 1 << REGNO (dest);
8624 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8625 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8626 output_asm_insn ("call\t%X2", xops);
8627 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8628 is what will be referenced by the Mach-O PIC subsystem. */
8631 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8633 targetm.asm_out.internal_label (asm_out_file, "L",
8634 CODE_LABEL_NUMBER (label));
8639 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8644 /* Generate an "push" pattern for input ARG. */
8649 struct machine_function *m = cfun->machine;
8651 if (m->fs.cfa_reg == stack_pointer_rtx)
8652 m->fs.cfa_offset += UNITS_PER_WORD;
8653 m->fs.sp_offset += UNITS_PER_WORD;
8655 return gen_rtx_SET (VOIDmode,
8657 gen_rtx_PRE_DEC (Pmode,
8658 stack_pointer_rtx)),
8662 /* Generate an "pop" pattern for input ARG. */
8667 return gen_rtx_SET (VOIDmode,
8670 gen_rtx_POST_INC (Pmode,
8671 stack_pointer_rtx)));
8674 /* Return >= 0 if there is an unused call-clobbered register available
8675 for the entire function. */
8678 ix86_select_alt_pic_regnum (void)
8680 if (current_function_is_leaf
8682 && !ix86_current_function_calls_tls_descriptor)
8685 /* Can't use the same register for both PIC and DRAP. */
8687 drap = REGNO (crtl->drap_reg);
8690 for (i = 2; i >= 0; --i)
8691 if (i != drap && !df_regs_ever_live_p (i))
8695 return INVALID_REGNUM;
8698 /* Return TRUE if we need to save REGNO. */
8701 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8703 if (pic_offset_table_rtx
8704 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8705 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8707 || crtl->calls_eh_return
8708 || crtl->uses_const_pool))
8709 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8711 if (crtl->calls_eh_return && maybe_eh_return)
8716 unsigned test = EH_RETURN_DATA_REGNO (i);
8717 if (test == INVALID_REGNUM)
8724 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8727 return (df_regs_ever_live_p (regno)
8728 && !call_used_regs[regno]
8729 && !fixed_regs[regno]
8730 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8733 /* Return number of saved general prupose registers. */
8736 ix86_nsaved_regs (void)
8741 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8742 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8747 /* Return number of saved SSE registrers. */
8750 ix86_nsaved_sseregs (void)
8755 if (!TARGET_64BIT_MS_ABI)
8757 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8758 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8763 /* Given FROM and TO register numbers, say whether this elimination is
8764 allowed. If stack alignment is needed, we can only replace argument
8765 pointer with hard frame pointer, or replace frame pointer with stack
8766 pointer. Otherwise, frame pointer elimination is automatically
8767 handled and all other eliminations are valid. */
8770 ix86_can_eliminate (const int from, const int to)
8772 if (stack_realign_fp)
8773 return ((from == ARG_POINTER_REGNUM
8774 && to == HARD_FRAME_POINTER_REGNUM)
8775 || (from == FRAME_POINTER_REGNUM
8776 && to == STACK_POINTER_REGNUM));
8778 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8781 /* Return the offset between two registers, one to be eliminated, and the other
8782 its replacement, at the start of a routine. */
8785 ix86_initial_elimination_offset (int from, int to)
8787 struct ix86_frame frame;
8788 ix86_compute_frame_layout (&frame);
8790 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8791 return frame.hard_frame_pointer_offset;
8792 else if (from == FRAME_POINTER_REGNUM
8793 && to == HARD_FRAME_POINTER_REGNUM)
8794 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8797 gcc_assert (to == STACK_POINTER_REGNUM);
8799 if (from == ARG_POINTER_REGNUM)
8800 return frame.stack_pointer_offset;
8802 gcc_assert (from == FRAME_POINTER_REGNUM);
8803 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8807 /* In a dynamically-aligned function, we can't know the offset from
8808 stack pointer to frame pointer, so we must ensure that setjmp
8809 eliminates fp against the hard fp (%ebp) rather than trying to
8810 index from %esp up to the top of the frame across a gap that is
8811 of unknown (at compile-time) size. */
8813 ix86_builtin_setjmp_frame_value (void)
8815 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8818 /* When using -fsplit-stack, the allocation routines set a field in
8819 the TCB to the bottom of the stack plus this much space, measured
8822 #define SPLIT_STACK_AVAILABLE 256
8824 /* Fill structure ix86_frame about frame of currently computed function. */
8827 ix86_compute_frame_layout (struct ix86_frame *frame)
8829 unsigned int stack_alignment_needed;
8830 HOST_WIDE_INT offset;
8831 unsigned int preferred_alignment;
8832 HOST_WIDE_INT size = get_frame_size ();
8833 HOST_WIDE_INT to_allocate;
8835 frame->nregs = ix86_nsaved_regs ();
8836 frame->nsseregs = ix86_nsaved_sseregs ();
8838 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8839 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8841 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8842 function prologues and leaf. */
8843 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8844 && (!current_function_is_leaf || cfun->calls_alloca != 0
8845 || ix86_current_function_calls_tls_descriptor))
8847 preferred_alignment = 16;
8848 stack_alignment_needed = 16;
8849 crtl->preferred_stack_boundary = 128;
8850 crtl->stack_alignment_needed = 128;
8853 gcc_assert (!size || stack_alignment_needed);
8854 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8855 gcc_assert (preferred_alignment <= stack_alignment_needed);
8857 /* For SEH we have to limit the amount of code movement into the prologue.
8858 At present we do this via a BLOCKAGE, at which point there's very little
8859 scheduling that can be done, which means that there's very little point
8860 in doing anything except PUSHs. */
8862 cfun->machine->use_fast_prologue_epilogue = false;
8864 /* During reload iteration the amount of registers saved can change.
8865 Recompute the value as needed. Do not recompute when amount of registers
8866 didn't change as reload does multiple calls to the function and does not
8867 expect the decision to change within single iteration. */
8868 else if (!optimize_function_for_size_p (cfun)
8869 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8871 int count = frame->nregs;
8872 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8874 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8876 /* The fast prologue uses move instead of push to save registers. This
8877 is significantly longer, but also executes faster as modern hardware
8878 can execute the moves in parallel, but can't do that for push/pop.
8880 Be careful about choosing what prologue to emit: When function takes
8881 many instructions to execute we may use slow version as well as in
8882 case function is known to be outside hot spot (this is known with
8883 feedback only). Weight the size of function by number of registers
8884 to save as it is cheap to use one or two push instructions but very
8885 slow to use many of them. */
8887 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8888 if (node->frequency < NODE_FREQUENCY_NORMAL
8889 || (flag_branch_probabilities
8890 && node->frequency < NODE_FREQUENCY_HOT))
8891 cfun->machine->use_fast_prologue_epilogue = false;
8893 cfun->machine->use_fast_prologue_epilogue
8894 = !expensive_function_p (count);
8897 frame->save_regs_using_mov
8898 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
8899 /* If static stack checking is enabled and done with probes,
8900 the registers need to be saved before allocating the frame. */
8901 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
8903 /* Skip return address. */
8904 offset = UNITS_PER_WORD;
8906 /* Skip pushed static chain. */
8907 if (ix86_static_chain_on_stack)
8908 offset += UNITS_PER_WORD;
8910 /* Skip saved base pointer. */
8911 if (frame_pointer_needed)
8912 offset += UNITS_PER_WORD;
8913 frame->hfp_save_offset = offset;
8915 /* The traditional frame pointer location is at the top of the frame. */
8916 frame->hard_frame_pointer_offset = offset;
8918 /* Register save area */
8919 offset += frame->nregs * UNITS_PER_WORD;
8920 frame->reg_save_offset = offset;
8922 /* On SEH target, registers are pushed just before the frame pointer
8925 frame->hard_frame_pointer_offset = offset;
8927 /* Align and set SSE register save area. */
8928 if (frame->nsseregs)
8930 /* The only ABI that has saved SSE registers (Win64) also has a
8931 16-byte aligned default stack, and thus we don't need to be
8932 within the re-aligned local stack frame to save them. */
8933 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8934 offset = (offset + 16 - 1) & -16;
8935 offset += frame->nsseregs * 16;
8937 frame->sse_reg_save_offset = offset;
8939 /* The re-aligned stack starts here. Values before this point are not
8940 directly comparable with values below this point. In order to make
8941 sure that no value happens to be the same before and after, force
8942 the alignment computation below to add a non-zero value. */
8943 if (stack_realign_fp)
8944 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8947 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8948 offset += frame->va_arg_size;
8950 /* Align start of frame for local function. */
8951 if (stack_realign_fp
8952 || offset != frame->sse_reg_save_offset
8954 || !current_function_is_leaf
8955 || cfun->calls_alloca
8956 || ix86_current_function_calls_tls_descriptor)
8957 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8959 /* Frame pointer points here. */
8960 frame->frame_pointer_offset = offset;
8964 /* Add outgoing arguments area. Can be skipped if we eliminated
8965 all the function calls as dead code.
8966 Skipping is however impossible when function calls alloca. Alloca
8967 expander assumes that last crtl->outgoing_args_size
8968 of stack frame are unused. */
8969 if (ACCUMULATE_OUTGOING_ARGS
8970 && (!current_function_is_leaf || cfun->calls_alloca
8971 || ix86_current_function_calls_tls_descriptor))
8973 offset += crtl->outgoing_args_size;
8974 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8977 frame->outgoing_arguments_size = 0;
8979 /* Align stack boundary. Only needed if we're calling another function
8981 if (!current_function_is_leaf || cfun->calls_alloca
8982 || ix86_current_function_calls_tls_descriptor)
8983 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8985 /* We've reached end of stack frame. */
8986 frame->stack_pointer_offset = offset;
8988 /* Size prologue needs to allocate. */
8989 to_allocate = offset - frame->sse_reg_save_offset;
8991 if ((!to_allocate && frame->nregs <= 1)
8992 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8993 frame->save_regs_using_mov = false;
8995 if (ix86_using_red_zone ()
8996 && current_function_sp_is_unchanging
8997 && current_function_is_leaf
8998 && !ix86_current_function_calls_tls_descriptor)
9000 frame->red_zone_size = to_allocate;
9001 if (frame->save_regs_using_mov)
9002 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9003 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9004 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9007 frame->red_zone_size = 0;
9008 frame->stack_pointer_offset -= frame->red_zone_size;
9010 /* The SEH frame pointer location is near the bottom of the frame.
9011 This is enforced by the fact that the difference between the
9012 stack pointer and the frame pointer is limited to 240 bytes in
9013 the unwind data structure. */
9018 /* If we can leave the frame pointer where it is, do so. Also, returns
9019 the establisher frame for __builtin_frame_address (0). */
9020 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9021 if (diff <= SEH_MAX_FRAME_SIZE
9022 && (diff > 240 || (diff & 15) != 0)
9023 && !crtl->accesses_prior_frames)
9025 /* Ideally we'd determine what portion of the local stack frame
9026 (within the constraint of the lowest 240) is most heavily used.
9027 But without that complication, simply bias the frame pointer
9028 by 128 bytes so as to maximize the amount of the local stack
9029 frame that is addressable with 8-bit offsets. */
9030 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9035 /* This is semi-inlined memory_address_length, but simplified
9036 since we know that we're always dealing with reg+offset, and
9037 to avoid having to create and discard all that rtl. */
9040 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9046 /* EBP and R13 cannot be encoded without an offset. */
9047 len = (regno == BP_REG || regno == R13_REG);
9049 else if (IN_RANGE (offset, -128, 127))
9052 /* ESP and R12 must be encoded with a SIB byte. */
9053 if (regno == SP_REG || regno == R12_REG)
9059 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9060 The valid base registers are taken from CFUN->MACHINE->FS. */
9063 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9065 const struct machine_function *m = cfun->machine;
9066 rtx base_reg = NULL;
9067 HOST_WIDE_INT base_offset = 0;
9069 if (m->use_fast_prologue_epilogue)
9071 /* Choose the base register most likely to allow the most scheduling
9072 opportunities. Generally FP is valid througout the function,
9073 while DRAP must be reloaded within the epilogue. But choose either
9074 over the SP due to increased encoding size. */
9078 base_reg = hard_frame_pointer_rtx;
9079 base_offset = m->fs.fp_offset - cfa_offset;
9081 else if (m->fs.drap_valid)
9083 base_reg = crtl->drap_reg;
9084 base_offset = 0 - cfa_offset;
9086 else if (m->fs.sp_valid)
9088 base_reg = stack_pointer_rtx;
9089 base_offset = m->fs.sp_offset - cfa_offset;
9094 HOST_WIDE_INT toffset;
9097 /* Choose the base register with the smallest address encoding.
9098 With a tie, choose FP > DRAP > SP. */
9101 base_reg = stack_pointer_rtx;
9102 base_offset = m->fs.sp_offset - cfa_offset;
9103 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9105 if (m->fs.drap_valid)
9107 toffset = 0 - cfa_offset;
9108 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9111 base_reg = crtl->drap_reg;
9112 base_offset = toffset;
9118 toffset = m->fs.fp_offset - cfa_offset;
9119 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9122 base_reg = hard_frame_pointer_rtx;
9123 base_offset = toffset;
9128 gcc_assert (base_reg != NULL);
9130 return plus_constant (base_reg, base_offset);
9133 /* Emit code to save registers in the prologue. */
9136 ix86_emit_save_regs (void)
9141 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9142 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9144 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9145 RTX_FRAME_RELATED_P (insn) = 1;
9149 /* Emit a single register save at CFA - CFA_OFFSET. */
9152 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9153 HOST_WIDE_INT cfa_offset)
9155 struct machine_function *m = cfun->machine;
9156 rtx reg = gen_rtx_REG (mode, regno);
9157 rtx mem, addr, base, insn;
9159 addr = choose_baseaddr (cfa_offset);
9160 mem = gen_frame_mem (mode, addr);
9162 /* For SSE saves, we need to indicate the 128-bit alignment. */
9163 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9165 insn = emit_move_insn (mem, reg);
9166 RTX_FRAME_RELATED_P (insn) = 1;
9169 if (GET_CODE (base) == PLUS)
9170 base = XEXP (base, 0);
9171 gcc_checking_assert (REG_P (base));
9173 /* When saving registers into a re-aligned local stack frame, avoid
9174 any tricky guessing by dwarf2out. */
9175 if (m->fs.realigned)
9177 gcc_checking_assert (stack_realign_drap);
9179 if (regno == REGNO (crtl->drap_reg))
9181 /* A bit of a hack. We force the DRAP register to be saved in
9182 the re-aligned stack frame, which provides us with a copy
9183 of the CFA that will last past the prologue. Install it. */
9184 gcc_checking_assert (cfun->machine->fs.fp_valid);
9185 addr = plus_constant (hard_frame_pointer_rtx,
9186 cfun->machine->fs.fp_offset - cfa_offset);
9187 mem = gen_rtx_MEM (mode, addr);
9188 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9192 /* The frame pointer is a stable reference within the
9193 aligned frame. Use it. */
9194 gcc_checking_assert (cfun->machine->fs.fp_valid);
9195 addr = plus_constant (hard_frame_pointer_rtx,
9196 cfun->machine->fs.fp_offset - cfa_offset);
9197 mem = gen_rtx_MEM (mode, addr);
9198 add_reg_note (insn, REG_CFA_EXPRESSION,
9199 gen_rtx_SET (VOIDmode, mem, reg));
9203 /* The memory may not be relative to the current CFA register,
9204 which means that we may need to generate a new pattern for
9205 use by the unwind info. */
9206 else if (base != m->fs.cfa_reg)
9208 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9209 mem = gen_rtx_MEM (mode, addr);
9210 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9214 /* Emit code to save registers using MOV insns.
9215 First register is stored at CFA - CFA_OFFSET. */
9217 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9221 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9222 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9224 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9225 cfa_offset -= UNITS_PER_WORD;
9229 /* Emit code to save SSE registers using MOV insns.
9230 First register is stored at CFA - CFA_OFFSET. */
9232 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9236 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9237 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9239 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9244 static GTY(()) rtx queued_cfa_restores;
9246 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9247 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9248 Don't add the note if the previously saved value will be left untouched
9249 within stack red-zone till return, as unwinders can find the same value
9250 in the register and on the stack. */
9253 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9255 if (!crtl->shrink_wrapped
9256 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9261 add_reg_note (insn, REG_CFA_RESTORE, reg);
9262 RTX_FRAME_RELATED_P (insn) = 1;
9266 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9269 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9272 ix86_add_queued_cfa_restore_notes (rtx insn)
9275 if (!queued_cfa_restores)
9277 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9279 XEXP (last, 1) = REG_NOTES (insn);
9280 REG_NOTES (insn) = queued_cfa_restores;
9281 queued_cfa_restores = NULL_RTX;
9282 RTX_FRAME_RELATED_P (insn) = 1;
9285 /* Expand prologue or epilogue stack adjustment.
9286 The pattern exist to put a dependency on all ebp-based memory accesses.
9287 STYLE should be negative if instructions should be marked as frame related,
9288 zero if %r11 register is live and cannot be freely used and positive
9292 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9293 int style, bool set_cfa)
9295 struct machine_function *m = cfun->machine;
9297 bool add_frame_related_expr = false;
9300 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9301 else if (x86_64_immediate_operand (offset, DImode))
9302 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9306 /* r11 is used by indirect sibcall return as well, set before the
9307 epilogue and used after the epilogue. */
9309 tmp = gen_rtx_REG (DImode, R11_REG);
9312 gcc_assert (src != hard_frame_pointer_rtx
9313 && dest != hard_frame_pointer_rtx);
9314 tmp = hard_frame_pointer_rtx;
9316 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9318 add_frame_related_expr = true;
9320 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9323 insn = emit_insn (insn);
9325 ix86_add_queued_cfa_restore_notes (insn);
9331 gcc_assert (m->fs.cfa_reg == src);
9332 m->fs.cfa_offset += INTVAL (offset);
9333 m->fs.cfa_reg = dest;
9335 r = gen_rtx_PLUS (Pmode, src, offset);
9336 r = gen_rtx_SET (VOIDmode, dest, r);
9337 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9338 RTX_FRAME_RELATED_P (insn) = 1;
9342 RTX_FRAME_RELATED_P (insn) = 1;
9343 if (add_frame_related_expr)
9345 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9346 r = gen_rtx_SET (VOIDmode, dest, r);
9347 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9351 if (dest == stack_pointer_rtx)
9353 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9354 bool valid = m->fs.sp_valid;
9356 if (src == hard_frame_pointer_rtx)
9358 valid = m->fs.fp_valid;
9359 ooffset = m->fs.fp_offset;
9361 else if (src == crtl->drap_reg)
9363 valid = m->fs.drap_valid;
9368 /* Else there are two possibilities: SP itself, which we set
9369 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9370 taken care of this by hand along the eh_return path. */
9371 gcc_checking_assert (src == stack_pointer_rtx
9372 || offset == const0_rtx);
9375 m->fs.sp_offset = ooffset - INTVAL (offset);
9376 m->fs.sp_valid = valid;
9380 /* Find an available register to be used as dynamic realign argument
9381 pointer regsiter. Such a register will be written in prologue and
9382 used in begin of body, so it must not be
9383 1. parameter passing register.
9385 We reuse static-chain register if it is available. Otherwise, we
9386 use DI for i386 and R13 for x86-64. We chose R13 since it has
9389 Return: the regno of chosen register. */
9392 find_drap_reg (void)
9394 tree decl = cfun->decl;
9398 /* Use R13 for nested function or function need static chain.
9399 Since function with tail call may use any caller-saved
9400 registers in epilogue, DRAP must not use caller-saved
9401 register in such case. */
9402 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9409 /* Use DI for nested function or function need static chain.
9410 Since function with tail call may use any caller-saved
9411 registers in epilogue, DRAP must not use caller-saved
9412 register in such case. */
9413 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9416 /* Reuse static chain register if it isn't used for parameter
9418 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9420 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9421 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9428 /* Return minimum incoming stack alignment. */
9431 ix86_minimum_incoming_stack_boundary (bool sibcall)
9433 unsigned int incoming_stack_boundary;
9435 /* Prefer the one specified at command line. */
9436 if (ix86_user_incoming_stack_boundary)
9437 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9438 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9439 if -mstackrealign is used, it isn't used for sibcall check and
9440 estimated stack alignment is 128bit. */
9443 && ix86_force_align_arg_pointer
9444 && crtl->stack_alignment_estimated == 128)
9445 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9447 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9449 /* Incoming stack alignment can be changed on individual functions
9450 via force_align_arg_pointer attribute. We use the smallest
9451 incoming stack boundary. */
9452 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9453 && lookup_attribute (ix86_force_align_arg_pointer_string,
9454 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9455 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9457 /* The incoming stack frame has to be aligned at least at
9458 parm_stack_boundary. */
9459 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9460 incoming_stack_boundary = crtl->parm_stack_boundary;
9462 /* Stack at entrance of main is aligned by runtime. We use the
9463 smallest incoming stack boundary. */
9464 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9465 && DECL_NAME (current_function_decl)
9466 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9467 && DECL_FILE_SCOPE_P (current_function_decl))
9468 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9470 return incoming_stack_boundary;
9473 /* Update incoming stack boundary and estimated stack alignment. */
9476 ix86_update_stack_boundary (void)
9478 ix86_incoming_stack_boundary
9479 = ix86_minimum_incoming_stack_boundary (false);
9481 /* x86_64 vararg needs 16byte stack alignment for register save
9485 && crtl->stack_alignment_estimated < 128)
9486 crtl->stack_alignment_estimated = 128;
9489 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9490 needed or an rtx for DRAP otherwise. */
9493 ix86_get_drap_rtx (void)
9495 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9496 crtl->need_drap = true;
9498 if (stack_realign_drap)
9500 /* Assign DRAP to vDRAP and returns vDRAP */
9501 unsigned int regno = find_drap_reg ();
9506 arg_ptr = gen_rtx_REG (Pmode, regno);
9507 crtl->drap_reg = arg_ptr;
9510 drap_vreg = copy_to_reg (arg_ptr);
9514 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9517 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9518 RTX_FRAME_RELATED_P (insn) = 1;
9526 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9529 ix86_internal_arg_pointer (void)
9531 return virtual_incoming_args_rtx;
9534 struct scratch_reg {
9539 /* Return a short-lived scratch register for use on function entry.
9540 In 32-bit mode, it is valid only after the registers are saved
9541 in the prologue. This register must be released by means of
9542 release_scratch_register_on_entry once it is dead. */
9545 get_scratch_register_on_entry (struct scratch_reg *sr)
9553 /* We always use R11 in 64-bit mode. */
9558 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9560 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9561 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9562 int regparm = ix86_function_regparm (fntype, decl);
9564 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9566 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9567 for the static chain register. */
9568 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9569 && drap_regno != AX_REG)
9571 else if (regparm < 2 && drap_regno != DX_REG)
9573 /* ecx is the static chain register. */
9574 else if (regparm < 3 && !fastcall_p && !static_chain_p
9575 && drap_regno != CX_REG)
9577 else if (ix86_save_reg (BX_REG, true))
9579 /* esi is the static chain register. */
9580 else if (!(regparm == 3 && static_chain_p)
9581 && ix86_save_reg (SI_REG, true))
9583 else if (ix86_save_reg (DI_REG, true))
9587 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9592 sr->reg = gen_rtx_REG (Pmode, regno);
9595 rtx insn = emit_insn (gen_push (sr->reg));
9596 RTX_FRAME_RELATED_P (insn) = 1;
9600 /* Release a scratch register obtained from the preceding function. */
9603 release_scratch_register_on_entry (struct scratch_reg *sr)
9607 rtx x, insn = emit_insn (gen_pop (sr->reg));
9609 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9610 RTX_FRAME_RELATED_P (insn) = 1;
9611 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9612 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9613 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9617 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9619 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9622 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9624 /* We skip the probe for the first interval + a small dope of 4 words and
9625 probe that many bytes past the specified size to maintain a protection
9626 area at the botton of the stack. */
9627 const int dope = 4 * UNITS_PER_WORD;
9628 rtx size_rtx = GEN_INT (size), last;
9630 /* See if we have a constant small number of probes to generate. If so,
9631 that's the easy case. The run-time loop is made up of 11 insns in the
9632 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9633 for n # of intervals. */
9634 if (size <= 5 * PROBE_INTERVAL)
9636 HOST_WIDE_INT i, adjust;
9637 bool first_probe = true;
9639 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9640 values of N from 1 until it exceeds SIZE. If only one probe is
9641 needed, this will not generate any code. Then adjust and probe
9642 to PROBE_INTERVAL + SIZE. */
9643 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9647 adjust = 2 * PROBE_INTERVAL + dope;
9648 first_probe = false;
9651 adjust = PROBE_INTERVAL;
9653 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9654 plus_constant (stack_pointer_rtx, -adjust)));
9655 emit_stack_probe (stack_pointer_rtx);
9659 adjust = size + PROBE_INTERVAL + dope;
9661 adjust = size + PROBE_INTERVAL - i;
9663 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9664 plus_constant (stack_pointer_rtx, -adjust)));
9665 emit_stack_probe (stack_pointer_rtx);
9667 /* Adjust back to account for the additional first interval. */
9668 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9669 plus_constant (stack_pointer_rtx,
9670 PROBE_INTERVAL + dope)));
9673 /* Otherwise, do the same as above, but in a loop. Note that we must be
9674 extra careful with variables wrapping around because we might be at
9675 the very top (or the very bottom) of the address space and we have
9676 to be able to handle this case properly; in particular, we use an
9677 equality test for the loop condition. */
9680 HOST_WIDE_INT rounded_size;
9681 struct scratch_reg sr;
9683 get_scratch_register_on_entry (&sr);
9686 /* Step 1: round SIZE to the previous multiple of the interval. */
9688 rounded_size = size & -PROBE_INTERVAL;
9691 /* Step 2: compute initial and final value of the loop counter. */
9693 /* SP = SP_0 + PROBE_INTERVAL. */
9694 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9695 plus_constant (stack_pointer_rtx,
9696 - (PROBE_INTERVAL + dope))));
9698 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9699 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9700 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9701 gen_rtx_PLUS (Pmode, sr.reg,
9702 stack_pointer_rtx)));
9707 while (SP != LAST_ADDR)
9709 SP = SP + PROBE_INTERVAL
9713 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9714 values of N from 1 until it is equal to ROUNDED_SIZE. */
9716 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9719 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9720 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9722 if (size != rounded_size)
9724 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9725 plus_constant (stack_pointer_rtx,
9726 rounded_size - size)));
9727 emit_stack_probe (stack_pointer_rtx);
9730 /* Adjust back to account for the additional first interval. */
9731 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9732 plus_constant (stack_pointer_rtx,
9733 PROBE_INTERVAL + dope)));
9735 release_scratch_register_on_entry (&sr);
9738 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9740 /* Even if the stack pointer isn't the CFA register, we need to correctly
9741 describe the adjustments made to it, in particular differentiate the
9742 frame-related ones from the frame-unrelated ones. */
9745 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9746 XVECEXP (expr, 0, 0)
9747 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9748 plus_constant (stack_pointer_rtx, -size));
9749 XVECEXP (expr, 0, 1)
9750 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9751 plus_constant (stack_pointer_rtx,
9752 PROBE_INTERVAL + dope + size));
9753 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9754 RTX_FRAME_RELATED_P (last) = 1;
9756 cfun->machine->fs.sp_offset += size;
9759 /* Make sure nothing is scheduled before we are done. */
9760 emit_insn (gen_blockage ());
9763 /* Adjust the stack pointer up to REG while probing it. */
9766 output_adjust_stack_and_probe (rtx reg)
9768 static int labelno = 0;
9769 char loop_lab[32], end_lab[32];
9772 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9773 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9775 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9777 /* Jump to END_LAB if SP == LAST_ADDR. */
9778 xops[0] = stack_pointer_rtx;
9780 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9781 fputs ("\tje\t", asm_out_file);
9782 assemble_name_raw (asm_out_file, end_lab);
9783 fputc ('\n', asm_out_file);
9785 /* SP = SP + PROBE_INTERVAL. */
9786 xops[1] = GEN_INT (PROBE_INTERVAL);
9787 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9790 xops[1] = const0_rtx;
9791 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9793 fprintf (asm_out_file, "\tjmp\t");
9794 assemble_name_raw (asm_out_file, loop_lab);
9795 fputc ('\n', asm_out_file);
9797 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9802 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9803 inclusive. These are offsets from the current stack pointer. */
9806 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9808 /* See if we have a constant small number of probes to generate. If so,
9809 that's the easy case. The run-time loop is made up of 7 insns in the
9810 generic case while the compile-time loop is made up of n insns for n #
9812 if (size <= 7 * PROBE_INTERVAL)
9816 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9817 it exceeds SIZE. If only one probe is needed, this will not
9818 generate any code. Then probe at FIRST + SIZE. */
9819 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9820 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9822 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9825 /* Otherwise, do the same as above, but in a loop. Note that we must be
9826 extra careful with variables wrapping around because we might be at
9827 the very top (or the very bottom) of the address space and we have
9828 to be able to handle this case properly; in particular, we use an
9829 equality test for the loop condition. */
9832 HOST_WIDE_INT rounded_size, last;
9833 struct scratch_reg sr;
9835 get_scratch_register_on_entry (&sr);
9838 /* Step 1: round SIZE to the previous multiple of the interval. */
9840 rounded_size = size & -PROBE_INTERVAL;
9843 /* Step 2: compute initial and final value of the loop counter. */
9845 /* TEST_OFFSET = FIRST. */
9846 emit_move_insn (sr.reg, GEN_INT (-first));
9848 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9849 last = first + rounded_size;
9854 while (TEST_ADDR != LAST_ADDR)
9856 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9860 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9861 until it is equal to ROUNDED_SIZE. */
9863 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9866 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9867 that SIZE is equal to ROUNDED_SIZE. */
9869 if (size != rounded_size)
9870 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9873 rounded_size - size));
9875 release_scratch_register_on_entry (&sr);
9878 /* Make sure nothing is scheduled before we are done. */
9879 emit_insn (gen_blockage ());
9882 /* Probe a range of stack addresses from REG to END, inclusive. These are
9883 offsets from the current stack pointer. */
9886 output_probe_stack_range (rtx reg, rtx end)
9888 static int labelno = 0;
9889 char loop_lab[32], end_lab[32];
9892 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9893 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9895 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9897 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9900 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9901 fputs ("\tje\t", asm_out_file);
9902 assemble_name_raw (asm_out_file, end_lab);
9903 fputc ('\n', asm_out_file);
9905 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9906 xops[1] = GEN_INT (PROBE_INTERVAL);
9907 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9909 /* Probe at TEST_ADDR. */
9910 xops[0] = stack_pointer_rtx;
9912 xops[2] = const0_rtx;
9913 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9915 fprintf (asm_out_file, "\tjmp\t");
9916 assemble_name_raw (asm_out_file, loop_lab);
9917 fputc ('\n', asm_out_file);
9919 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9924 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9925 to be generated in correct form. */
9927 ix86_finalize_stack_realign_flags (void)
9929 /* Check if stack realign is really needed after reload, and
9930 stores result in cfun */
9931 unsigned int incoming_stack_boundary
9932 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9933 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9934 unsigned int stack_realign = (incoming_stack_boundary
9935 < (current_function_is_leaf
9936 ? crtl->max_used_stack_slot_alignment
9937 : crtl->stack_alignment_needed));
9939 if (crtl->stack_realign_finalized)
9941 /* After stack_realign_needed is finalized, we can't no longer
9943 gcc_assert (crtl->stack_realign_needed == stack_realign);
9947 /* If the only reason for frame_pointer_needed is that we conservatively
9948 assumed stack realignment might be needed, but in the end nothing that
9949 needed the stack alignment had been spilled, clear frame_pointer_needed
9950 and say we don't need stack realignment. */
9953 && frame_pointer_needed
9954 && current_function_is_leaf
9955 && flag_omit_frame_pointer
9956 && current_function_sp_is_unchanging
9957 && !ix86_current_function_calls_tls_descriptor
9958 && !crtl->accesses_prior_frames
9959 && !cfun->calls_alloca
9960 && !crtl->calls_eh_return
9961 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
9962 && !ix86_frame_pointer_required ()
9963 && get_frame_size () == 0
9964 && ix86_nsaved_sseregs () == 0
9965 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
9967 HARD_REG_SET set_up_by_prologue, prologue_used;
9970 CLEAR_HARD_REG_SET (prologue_used);
9971 CLEAR_HARD_REG_SET (set_up_by_prologue);
9972 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
9973 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
9974 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
9975 HARD_FRAME_POINTER_REGNUM);
9979 FOR_BB_INSNS (bb, insn)
9980 if (NONDEBUG_INSN_P (insn)
9981 && requires_stack_frame_p (insn, prologue_used,
9982 set_up_by_prologue))
9984 crtl->stack_realign_needed = stack_realign;
9985 crtl->stack_realign_finalized = true;
9990 frame_pointer_needed = false;
9991 stack_realign = false;
9992 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
9993 crtl->stack_alignment_needed = incoming_stack_boundary;
9994 crtl->stack_alignment_estimated = incoming_stack_boundary;
9995 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
9996 crtl->preferred_stack_boundary = incoming_stack_boundary;
9997 df_finish_pass (true);
9998 df_scan_alloc (NULL);
10000 df_compute_regs_ever_live (true);
10004 crtl->stack_realign_needed = stack_realign;
10005 crtl->stack_realign_finalized = true;
10008 /* Expand the prologue into a bunch of separate insns. */
10011 ix86_expand_prologue (void)
10013 struct machine_function *m = cfun->machine;
10016 struct ix86_frame frame;
10017 HOST_WIDE_INT allocate;
10018 bool int_registers_saved;
10019 bool sse_registers_saved;
10021 ix86_finalize_stack_realign_flags ();
10023 /* DRAP should not coexist with stack_realign_fp */
10024 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10026 memset (&m->fs, 0, sizeof (m->fs));
10028 /* Initialize CFA state for before the prologue. */
10029 m->fs.cfa_reg = stack_pointer_rtx;
10030 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10032 /* Track SP offset to the CFA. We continue tracking this after we've
10033 swapped the CFA register away from SP. In the case of re-alignment
10034 this is fudged; we're interested to offsets within the local frame. */
10035 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10036 m->fs.sp_valid = true;
10038 ix86_compute_frame_layout (&frame);
10040 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10042 /* We should have already generated an error for any use of
10043 ms_hook on a nested function. */
10044 gcc_checking_assert (!ix86_static_chain_on_stack);
10046 /* Check if profiling is active and we shall use profiling before
10047 prologue variant. If so sorry. */
10048 if (crtl->profile && flag_fentry != 0)
10049 sorry ("ms_hook_prologue attribute isn%'t compatible "
10050 "with -mfentry for 32-bit");
10052 /* In ix86_asm_output_function_label we emitted:
10053 8b ff movl.s %edi,%edi
10055 8b ec movl.s %esp,%ebp
10057 This matches the hookable function prologue in Win32 API
10058 functions in Microsoft Windows XP Service Pack 2 and newer.
10059 Wine uses this to enable Windows apps to hook the Win32 API
10060 functions provided by Wine.
10062 What that means is that we've already set up the frame pointer. */
10064 if (frame_pointer_needed
10065 && !(crtl->drap_reg && crtl->stack_realign_needed))
10069 /* We've decided to use the frame pointer already set up.
10070 Describe this to the unwinder by pretending that both
10071 push and mov insns happen right here.
10073 Putting the unwind info here at the end of the ms_hook
10074 is done so that we can make absolutely certain we get
10075 the required byte sequence at the start of the function,
10076 rather than relying on an assembler that can produce
10077 the exact encoding required.
10079 However it does mean (in the unpatched case) that we have
10080 a 1 insn window where the asynchronous unwind info is
10081 incorrect. However, if we placed the unwind info at
10082 its correct location we would have incorrect unwind info
10083 in the patched case. Which is probably all moot since
10084 I don't expect Wine generates dwarf2 unwind info for the
10085 system libraries that use this feature. */
10087 insn = emit_insn (gen_blockage ());
10089 push = gen_push (hard_frame_pointer_rtx);
10090 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10091 stack_pointer_rtx);
10092 RTX_FRAME_RELATED_P (push) = 1;
10093 RTX_FRAME_RELATED_P (mov) = 1;
10095 RTX_FRAME_RELATED_P (insn) = 1;
10096 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10097 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10099 /* Note that gen_push incremented m->fs.cfa_offset, even
10100 though we didn't emit the push insn here. */
10101 m->fs.cfa_reg = hard_frame_pointer_rtx;
10102 m->fs.fp_offset = m->fs.cfa_offset;
10103 m->fs.fp_valid = true;
10107 /* The frame pointer is not needed so pop %ebp again.
10108 This leaves us with a pristine state. */
10109 emit_insn (gen_pop (hard_frame_pointer_rtx));
10113 /* The first insn of a function that accepts its static chain on the
10114 stack is to push the register that would be filled in by a direct
10115 call. This insn will be skipped by the trampoline. */
10116 else if (ix86_static_chain_on_stack)
10118 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10119 emit_insn (gen_blockage ());
10121 /* We don't want to interpret this push insn as a register save,
10122 only as a stack adjustment. The real copy of the register as
10123 a save will be done later, if needed. */
10124 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10125 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10126 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10127 RTX_FRAME_RELATED_P (insn) = 1;
10130 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10131 of DRAP is needed and stack realignment is really needed after reload */
10132 if (stack_realign_drap)
10134 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10136 /* Only need to push parameter pointer reg if it is caller saved. */
10137 if (!call_used_regs[REGNO (crtl->drap_reg)])
10139 /* Push arg pointer reg */
10140 insn = emit_insn (gen_push (crtl->drap_reg));
10141 RTX_FRAME_RELATED_P (insn) = 1;
10144 /* Grab the argument pointer. */
10145 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10146 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10147 RTX_FRAME_RELATED_P (insn) = 1;
10148 m->fs.cfa_reg = crtl->drap_reg;
10149 m->fs.cfa_offset = 0;
10151 /* Align the stack. */
10152 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10154 GEN_INT (-align_bytes)));
10155 RTX_FRAME_RELATED_P (insn) = 1;
10157 /* Replicate the return address on the stack so that return
10158 address can be reached via (argp - 1) slot. This is needed
10159 to implement macro RETURN_ADDR_RTX and intrinsic function
10160 expand_builtin_return_addr etc. */
10161 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10162 t = gen_frame_mem (Pmode, t);
10163 insn = emit_insn (gen_push (t));
10164 RTX_FRAME_RELATED_P (insn) = 1;
10166 /* For the purposes of frame and register save area addressing,
10167 we've started over with a new frame. */
10168 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10169 m->fs.realigned = true;
10172 int_registers_saved = (frame.nregs == 0);
10173 sse_registers_saved = (frame.nsseregs == 0);
10175 if (frame_pointer_needed && !m->fs.fp_valid)
10177 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10178 slower on all targets. Also sdb doesn't like it. */
10179 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10180 RTX_FRAME_RELATED_P (insn) = 1;
10182 /* Push registers now, before setting the frame pointer
10184 if (!int_registers_saved
10186 && !frame.save_regs_using_mov)
10188 ix86_emit_save_regs ();
10189 int_registers_saved = true;
10190 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10193 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10195 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10196 RTX_FRAME_RELATED_P (insn) = 1;
10198 if (m->fs.cfa_reg == stack_pointer_rtx)
10199 m->fs.cfa_reg = hard_frame_pointer_rtx;
10200 m->fs.fp_offset = m->fs.sp_offset;
10201 m->fs.fp_valid = true;
10205 if (!int_registers_saved)
10207 /* If saving registers via PUSH, do so now. */
10208 if (!frame.save_regs_using_mov)
10210 ix86_emit_save_regs ();
10211 int_registers_saved = true;
10212 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10215 /* When using red zone we may start register saving before allocating
10216 the stack frame saving one cycle of the prologue. However, avoid
10217 doing this if we have to probe the stack; at least on x86_64 the
10218 stack probe can turn into a call that clobbers a red zone location. */
10219 else if (ix86_using_red_zone ()
10220 && (! TARGET_STACK_PROBE
10221 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10223 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10224 int_registers_saved = true;
10228 if (stack_realign_fp)
10230 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10231 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10233 /* The computation of the size of the re-aligned stack frame means
10234 that we must allocate the size of the register save area before
10235 performing the actual alignment. Otherwise we cannot guarantee
10236 that there's enough storage above the realignment point. */
10237 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10238 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10239 GEN_INT (m->fs.sp_offset
10240 - frame.sse_reg_save_offset),
10243 /* Align the stack. */
10244 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10246 GEN_INT (-align_bytes)));
10248 /* For the purposes of register save area addressing, the stack
10249 pointer is no longer valid. As for the value of sp_offset,
10250 see ix86_compute_frame_layout, which we need to match in order
10251 to pass verification of stack_pointer_offset at the end. */
10252 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10253 m->fs.sp_valid = false;
10256 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10258 if (flag_stack_usage_info)
10260 /* We start to count from ARG_POINTER. */
10261 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10263 /* If it was realigned, take into account the fake frame. */
10264 if (stack_realign_drap)
10266 if (ix86_static_chain_on_stack)
10267 stack_size += UNITS_PER_WORD;
10269 if (!call_used_regs[REGNO (crtl->drap_reg)])
10270 stack_size += UNITS_PER_WORD;
10272 /* This over-estimates by 1 minimal-stack-alignment-unit but
10273 mitigates that by counting in the new return address slot. */
10274 current_function_dynamic_stack_size
10275 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10278 current_function_static_stack_size = stack_size;
10281 /* On SEH target with very large frame size, allocate an area to save
10282 SSE registers (as the very large allocation won't be described). */
10284 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10285 && !sse_registers_saved)
10287 HOST_WIDE_INT sse_size =
10288 frame.sse_reg_save_offset - frame.reg_save_offset;
10290 gcc_assert (int_registers_saved);
10292 /* No need to do stack checking as the area will be immediately
10294 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10295 GEN_INT (-sse_size), -1,
10296 m->fs.cfa_reg == stack_pointer_rtx);
10297 allocate -= sse_size;
10298 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10299 sse_registers_saved = true;
10302 /* The stack has already been decremented by the instruction calling us
10303 so probe if the size is non-negative to preserve the protection area. */
10304 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10306 /* We expect the registers to be saved when probes are used. */
10307 gcc_assert (int_registers_saved);
10309 if (STACK_CHECK_MOVING_SP)
10311 ix86_adjust_stack_and_probe (allocate);
10316 HOST_WIDE_INT size = allocate;
10318 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10319 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10321 if (TARGET_STACK_PROBE)
10322 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10324 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10330 else if (!ix86_target_stack_probe ()
10331 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10333 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10334 GEN_INT (-allocate), -1,
10335 m->fs.cfa_reg == stack_pointer_rtx);
10339 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10341 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10342 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
10343 bool eax_live = false;
10344 bool r10_live = false;
10347 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10348 if (!TARGET_64BIT_MS_ABI)
10349 eax_live = ix86_eax_live_at_start_p ();
10351 /* Note that SEH directives need to continue tracking the stack
10352 pointer even after the frame pointer has been set up. */
10355 insn = emit_insn (gen_push (eax));
10356 allocate -= UNITS_PER_WORD;
10357 if (sp_is_cfa_reg || TARGET_SEH)
10360 m->fs.cfa_offset += UNITS_PER_WORD;
10361 RTX_FRAME_RELATED_P (insn) = 1;
10367 r10 = gen_rtx_REG (Pmode, R10_REG);
10368 insn = emit_insn (gen_push (r10));
10369 allocate -= UNITS_PER_WORD;
10370 if (sp_is_cfa_reg || TARGET_SEH)
10373 m->fs.cfa_offset += UNITS_PER_WORD;
10374 RTX_FRAME_RELATED_P (insn) = 1;
10378 emit_move_insn (eax, GEN_INT (allocate));
10379 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10381 /* Use the fact that AX still contains ALLOCATE. */
10382 adjust_stack_insn = (TARGET_64BIT
10383 ? gen_pro_epilogue_adjust_stack_di_sub
10384 : gen_pro_epilogue_adjust_stack_si_sub);
10386 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10387 stack_pointer_rtx, eax));
10389 if (sp_is_cfa_reg || TARGET_SEH)
10392 m->fs.cfa_offset += allocate;
10393 RTX_FRAME_RELATED_P (insn) = 1;
10394 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10395 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10396 plus_constant (stack_pointer_rtx,
10399 m->fs.sp_offset += allocate;
10401 if (r10_live && eax_live)
10403 t = choose_baseaddr (m->fs.sp_offset - allocate);
10404 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10405 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10406 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10408 else if (eax_live || r10_live)
10410 t = choose_baseaddr (m->fs.sp_offset - allocate);
10411 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10414 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10416 /* If we havn't already set up the frame pointer, do so now. */
10417 if (frame_pointer_needed && !m->fs.fp_valid)
10419 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10420 GEN_INT (frame.stack_pointer_offset
10421 - frame.hard_frame_pointer_offset));
10422 insn = emit_insn (insn);
10423 RTX_FRAME_RELATED_P (insn) = 1;
10424 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10426 if (m->fs.cfa_reg == stack_pointer_rtx)
10427 m->fs.cfa_reg = hard_frame_pointer_rtx;
10428 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10429 m->fs.fp_valid = true;
10432 if (!int_registers_saved)
10433 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10434 if (!sse_registers_saved)
10435 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10437 pic_reg_used = false;
10438 if (pic_offset_table_rtx
10439 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10442 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10444 if (alt_pic_reg_used != INVALID_REGNUM)
10445 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10447 pic_reg_used = true;
10454 if (ix86_cmodel == CM_LARGE_PIC)
10456 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10457 rtx label = gen_label_rtx ();
10458 emit_label (label);
10459 LABEL_PRESERVE_P (label) = 1;
10460 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10461 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10462 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10463 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10464 pic_offset_table_rtx, tmp_reg));
10467 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10471 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10472 RTX_FRAME_RELATED_P (insn) = 1;
10473 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10477 /* In the pic_reg_used case, make sure that the got load isn't deleted
10478 when mcount needs it. Blockage to avoid call movement across mcount
10479 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10481 if (crtl->profile && !flag_fentry && pic_reg_used)
10482 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10484 if (crtl->drap_reg && !crtl->stack_realign_needed)
10486 /* vDRAP is setup but after reload it turns out stack realign
10487 isn't necessary, here we will emit prologue to setup DRAP
10488 without stack realign adjustment */
10489 t = choose_baseaddr (0);
10490 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10493 /* Prevent instructions from being scheduled into register save push
10494 sequence when access to the redzone area is done through frame pointer.
10495 The offset between the frame pointer and the stack pointer is calculated
10496 relative to the value of the stack pointer at the end of the function
10497 prologue, and moving instructions that access redzone area via frame
10498 pointer inside push sequence violates this assumption. */
10499 if (frame_pointer_needed && frame.red_zone_size)
10500 emit_insn (gen_memory_blockage ());
10502 /* Emit cld instruction if stringops are used in the function. */
10503 if (TARGET_CLD && ix86_current_function_needs_cld)
10504 emit_insn (gen_cld ());
10506 /* SEH requires that the prologue end within 256 bytes of the start of
10507 the function. Prevent instruction schedules that would extend that.
10508 Further, prevent alloca modifications to the stack pointer from being
10509 combined with prologue modifications. */
10511 emit_insn (gen_prologue_use (stack_pointer_rtx));
10514 /* Emit code to restore REG using a POP insn. */
10517 ix86_emit_restore_reg_using_pop (rtx reg)
10519 struct machine_function *m = cfun->machine;
10520 rtx insn = emit_insn (gen_pop (reg));
10522 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10523 m->fs.sp_offset -= UNITS_PER_WORD;
10525 if (m->fs.cfa_reg == crtl->drap_reg
10526 && REGNO (reg) == REGNO (crtl->drap_reg))
10528 /* Previously we'd represented the CFA as an expression
10529 like *(%ebp - 8). We've just popped that value from
10530 the stack, which means we need to reset the CFA to
10531 the drap register. This will remain until we restore
10532 the stack pointer. */
10533 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10534 RTX_FRAME_RELATED_P (insn) = 1;
10536 /* This means that the DRAP register is valid for addressing too. */
10537 m->fs.drap_valid = true;
10541 if (m->fs.cfa_reg == stack_pointer_rtx)
10543 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10544 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10545 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10546 RTX_FRAME_RELATED_P (insn) = 1;
10548 m->fs.cfa_offset -= UNITS_PER_WORD;
10551 /* When the frame pointer is the CFA, and we pop it, we are
10552 swapping back to the stack pointer as the CFA. This happens
10553 for stack frames that don't allocate other data, so we assume
10554 the stack pointer is now pointing at the return address, i.e.
10555 the function entry state, which makes the offset be 1 word. */
10556 if (reg == hard_frame_pointer_rtx)
10558 m->fs.fp_valid = false;
10559 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10561 m->fs.cfa_reg = stack_pointer_rtx;
10562 m->fs.cfa_offset -= UNITS_PER_WORD;
10564 add_reg_note (insn, REG_CFA_DEF_CFA,
10565 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10566 GEN_INT (m->fs.cfa_offset)));
10567 RTX_FRAME_RELATED_P (insn) = 1;
10572 /* Emit code to restore saved registers using POP insns. */
10575 ix86_emit_restore_regs_using_pop (void)
10577 unsigned int regno;
10579 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10580 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10581 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10584 /* Emit code and notes for the LEAVE instruction. */
10587 ix86_emit_leave (void)
10589 struct machine_function *m = cfun->machine;
10590 rtx insn = emit_insn (ix86_gen_leave ());
10592 ix86_add_queued_cfa_restore_notes (insn);
10594 gcc_assert (m->fs.fp_valid);
10595 m->fs.sp_valid = true;
10596 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10597 m->fs.fp_valid = false;
10599 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10601 m->fs.cfa_reg = stack_pointer_rtx;
10602 m->fs.cfa_offset = m->fs.sp_offset;
10604 add_reg_note (insn, REG_CFA_DEF_CFA,
10605 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10606 RTX_FRAME_RELATED_P (insn) = 1;
10608 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10612 /* Emit code to restore saved registers using MOV insns.
10613 First register is restored from CFA - CFA_OFFSET. */
10615 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10616 bool maybe_eh_return)
10618 struct machine_function *m = cfun->machine;
10619 unsigned int regno;
10621 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10622 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10624 rtx reg = gen_rtx_REG (Pmode, regno);
10627 mem = choose_baseaddr (cfa_offset);
10628 mem = gen_frame_mem (Pmode, mem);
10629 insn = emit_move_insn (reg, mem);
10631 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10633 /* Previously we'd represented the CFA as an expression
10634 like *(%ebp - 8). We've just popped that value from
10635 the stack, which means we need to reset the CFA to
10636 the drap register. This will remain until we restore
10637 the stack pointer. */
10638 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10639 RTX_FRAME_RELATED_P (insn) = 1;
10641 /* This means that the DRAP register is valid for addressing. */
10642 m->fs.drap_valid = true;
10645 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10647 cfa_offset -= UNITS_PER_WORD;
10651 /* Emit code to restore saved registers using MOV insns.
10652 First register is restored from CFA - CFA_OFFSET. */
10654 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10655 bool maybe_eh_return)
10657 unsigned int regno;
10659 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10660 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10662 rtx reg = gen_rtx_REG (V4SFmode, regno);
10665 mem = choose_baseaddr (cfa_offset);
10666 mem = gen_rtx_MEM (V4SFmode, mem);
10667 set_mem_align (mem, 128);
10668 emit_move_insn (reg, mem);
10670 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10676 /* Emit vzeroupper if needed. */
10679 ix86_maybe_emit_epilogue_vzeroupper (void)
10681 if (TARGET_VZEROUPPER
10682 && !TREE_THIS_VOLATILE (cfun->decl)
10683 && !cfun->machine->caller_return_avx256_p)
10684 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10687 /* Restore function stack, frame, and registers. */
10690 ix86_expand_epilogue (int style)
10692 struct machine_function *m = cfun->machine;
10693 struct machine_frame_state frame_state_save = m->fs;
10694 struct ix86_frame frame;
10695 bool restore_regs_via_mov;
10698 ix86_finalize_stack_realign_flags ();
10699 ix86_compute_frame_layout (&frame);
10701 m->fs.sp_valid = (!frame_pointer_needed
10702 || (current_function_sp_is_unchanging
10703 && !stack_realign_fp));
10704 gcc_assert (!m->fs.sp_valid
10705 || m->fs.sp_offset == frame.stack_pointer_offset);
10707 /* The FP must be valid if the frame pointer is present. */
10708 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10709 gcc_assert (!m->fs.fp_valid
10710 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10712 /* We must have *some* valid pointer to the stack frame. */
10713 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10715 /* The DRAP is never valid at this point. */
10716 gcc_assert (!m->fs.drap_valid);
10718 /* See the comment about red zone and frame
10719 pointer usage in ix86_expand_prologue. */
10720 if (frame_pointer_needed && frame.red_zone_size)
10721 emit_insn (gen_memory_blockage ());
10723 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10724 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10726 /* Determine the CFA offset of the end of the red-zone. */
10727 m->fs.red_zone_offset = 0;
10728 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10730 /* The red-zone begins below the return address. */
10731 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10733 /* When the register save area is in the aligned portion of
10734 the stack, determine the maximum runtime displacement that
10735 matches up with the aligned frame. */
10736 if (stack_realign_drap)
10737 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10741 /* Special care must be taken for the normal return case of a function
10742 using eh_return: the eax and edx registers are marked as saved, but
10743 not restored along this path. Adjust the save location to match. */
10744 if (crtl->calls_eh_return && style != 2)
10745 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10747 /* EH_RETURN requires the use of moves to function properly. */
10748 if (crtl->calls_eh_return)
10749 restore_regs_via_mov = true;
10750 /* SEH requires the use of pops to identify the epilogue. */
10751 else if (TARGET_SEH)
10752 restore_regs_via_mov = false;
10753 /* If we're only restoring one register and sp is not valid then
10754 using a move instruction to restore the register since it's
10755 less work than reloading sp and popping the register. */
10756 else if (!m->fs.sp_valid && frame.nregs <= 1)
10757 restore_regs_via_mov = true;
10758 else if (TARGET_EPILOGUE_USING_MOVE
10759 && cfun->machine->use_fast_prologue_epilogue
10760 && (frame.nregs > 1
10761 || m->fs.sp_offset != frame.reg_save_offset))
10762 restore_regs_via_mov = true;
10763 else if (frame_pointer_needed
10765 && m->fs.sp_offset != frame.reg_save_offset)
10766 restore_regs_via_mov = true;
10767 else if (frame_pointer_needed
10768 && TARGET_USE_LEAVE
10769 && cfun->machine->use_fast_prologue_epilogue
10770 && frame.nregs == 1)
10771 restore_regs_via_mov = true;
10773 restore_regs_via_mov = false;
10775 if (restore_regs_via_mov || frame.nsseregs)
10777 /* Ensure that the entire register save area is addressable via
10778 the stack pointer, if we will restore via sp. */
10780 && m->fs.sp_offset > 0x7fffffff
10781 && !(m->fs.fp_valid || m->fs.drap_valid)
10782 && (frame.nsseregs + frame.nregs) != 0)
10784 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10785 GEN_INT (m->fs.sp_offset
10786 - frame.sse_reg_save_offset),
10788 m->fs.cfa_reg == stack_pointer_rtx);
10792 /* If there are any SSE registers to restore, then we have to do it
10793 via moves, since there's obviously no pop for SSE regs. */
10794 if (frame.nsseregs)
10795 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10798 if (restore_regs_via_mov)
10803 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10805 /* eh_return epilogues need %ecx added to the stack pointer. */
10808 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10810 /* Stack align doesn't work with eh_return. */
10811 gcc_assert (!stack_realign_drap);
10812 /* Neither does regparm nested functions. */
10813 gcc_assert (!ix86_static_chain_on_stack);
10815 if (frame_pointer_needed)
10817 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10818 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10819 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10821 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10822 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10824 /* Note that we use SA as a temporary CFA, as the return
10825 address is at the proper place relative to it. We
10826 pretend this happens at the FP restore insn because
10827 prior to this insn the FP would be stored at the wrong
10828 offset relative to SA, and after this insn we have no
10829 other reasonable register to use for the CFA. We don't
10830 bother resetting the CFA to the SP for the duration of
10831 the return insn. */
10832 add_reg_note (insn, REG_CFA_DEF_CFA,
10833 plus_constant (sa, UNITS_PER_WORD));
10834 ix86_add_queued_cfa_restore_notes (insn);
10835 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10836 RTX_FRAME_RELATED_P (insn) = 1;
10838 m->fs.cfa_reg = sa;
10839 m->fs.cfa_offset = UNITS_PER_WORD;
10840 m->fs.fp_valid = false;
10842 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10843 const0_rtx, style, false);
10847 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10848 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10849 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10850 ix86_add_queued_cfa_restore_notes (insn);
10852 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10853 if (m->fs.cfa_offset != UNITS_PER_WORD)
10855 m->fs.cfa_offset = UNITS_PER_WORD;
10856 add_reg_note (insn, REG_CFA_DEF_CFA,
10857 plus_constant (stack_pointer_rtx,
10859 RTX_FRAME_RELATED_P (insn) = 1;
10862 m->fs.sp_offset = UNITS_PER_WORD;
10863 m->fs.sp_valid = true;
10868 /* SEH requires that the function end with (1) a stack adjustment
10869 if necessary, (2) a sequence of pops, and (3) a return or
10870 jump instruction. Prevent insns from the function body from
10871 being scheduled into this sequence. */
10874 /* Prevent a catch region from being adjacent to the standard
10875 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10876 several other flags that would be interesting to test are
10878 if (flag_non_call_exceptions)
10879 emit_insn (gen_nops (const1_rtx));
10881 emit_insn (gen_blockage ());
10884 /* First step is to deallocate the stack frame so that we can
10885 pop the registers. Also do it on SEH target for very large
10886 frame as the emitted instructions aren't allowed by the ABI in
10888 if (!m->fs.sp_valid
10890 && (m->fs.sp_offset - frame.reg_save_offset
10891 >= SEH_MAX_FRAME_SIZE)))
10893 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10894 GEN_INT (m->fs.fp_offset
10895 - frame.reg_save_offset),
10898 else if (m->fs.sp_offset != frame.reg_save_offset)
10900 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10901 GEN_INT (m->fs.sp_offset
10902 - frame.reg_save_offset),
10904 m->fs.cfa_reg == stack_pointer_rtx);
10907 ix86_emit_restore_regs_using_pop ();
10910 /* If we used a stack pointer and haven't already got rid of it,
10912 if (m->fs.fp_valid)
10914 /* If the stack pointer is valid and pointing at the frame
10915 pointer store address, then we only need a pop. */
10916 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10917 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10918 /* Leave results in shorter dependency chains on CPUs that are
10919 able to grok it fast. */
10920 else if (TARGET_USE_LEAVE
10921 || optimize_function_for_size_p (cfun)
10922 || !cfun->machine->use_fast_prologue_epilogue)
10923 ix86_emit_leave ();
10926 pro_epilogue_adjust_stack (stack_pointer_rtx,
10927 hard_frame_pointer_rtx,
10928 const0_rtx, style, !using_drap);
10929 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10935 int param_ptr_offset = UNITS_PER_WORD;
10938 gcc_assert (stack_realign_drap);
10940 if (ix86_static_chain_on_stack)
10941 param_ptr_offset += UNITS_PER_WORD;
10942 if (!call_used_regs[REGNO (crtl->drap_reg)])
10943 param_ptr_offset += UNITS_PER_WORD;
10945 insn = emit_insn (gen_rtx_SET
10946 (VOIDmode, stack_pointer_rtx,
10947 gen_rtx_PLUS (Pmode,
10949 GEN_INT (-param_ptr_offset))));
10950 m->fs.cfa_reg = stack_pointer_rtx;
10951 m->fs.cfa_offset = param_ptr_offset;
10952 m->fs.sp_offset = param_ptr_offset;
10953 m->fs.realigned = false;
10955 add_reg_note (insn, REG_CFA_DEF_CFA,
10956 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10957 GEN_INT (param_ptr_offset)));
10958 RTX_FRAME_RELATED_P (insn) = 1;
10960 if (!call_used_regs[REGNO (crtl->drap_reg)])
10961 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10964 /* At this point the stack pointer must be valid, and we must have
10965 restored all of the registers. We may not have deallocated the
10966 entire stack frame. We've delayed this until now because it may
10967 be possible to merge the local stack deallocation with the
10968 deallocation forced by ix86_static_chain_on_stack. */
10969 gcc_assert (m->fs.sp_valid);
10970 gcc_assert (!m->fs.fp_valid);
10971 gcc_assert (!m->fs.realigned);
10972 if (m->fs.sp_offset != UNITS_PER_WORD)
10974 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10975 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10979 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10981 /* Sibcall epilogues don't want a return instruction. */
10984 m->fs = frame_state_save;
10988 /* Emit vzeroupper if needed. */
10989 ix86_maybe_emit_epilogue_vzeroupper ();
10991 if (crtl->args.pops_args && crtl->args.size)
10993 rtx popc = GEN_INT (crtl->args.pops_args);
10995 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10996 address, do explicit add, and jump indirectly to the caller. */
10998 if (crtl->args.pops_args >= 65536)
11000 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11003 /* There is no "pascal" calling convention in any 64bit ABI. */
11004 gcc_assert (!TARGET_64BIT);
11006 insn = emit_insn (gen_pop (ecx));
11007 m->fs.cfa_offset -= UNITS_PER_WORD;
11008 m->fs.sp_offset -= UNITS_PER_WORD;
11010 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11011 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11012 add_reg_note (insn, REG_CFA_REGISTER,
11013 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11014 RTX_FRAME_RELATED_P (insn) = 1;
11016 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11018 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11021 emit_jump_insn (gen_simple_return_pop_internal (popc));
11024 emit_jump_insn (gen_simple_return_internal ());
11026 /* Restore the state back to the state from the prologue,
11027 so that it's correct for the next epilogue. */
11028 m->fs = frame_state_save;
11031 /* Reset from the function's potential modifications. */
11034 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11035 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11037 if (pic_offset_table_rtx)
11038 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11040 /* Mach-O doesn't support labels at the end of objects, so if
11041 it looks like we might want one, insert a NOP. */
11043 rtx insn = get_last_insn ();
11044 rtx deleted_debug_label = NULL_RTX;
11047 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11049 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11050 notes only, instead set their CODE_LABEL_NUMBER to -1,
11051 otherwise there would be code generation differences
11052 in between -g and -g0. */
11053 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11054 deleted_debug_label = insn;
11055 insn = PREV_INSN (insn);
11060 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11061 fputs ("\tnop\n", file);
11062 else if (deleted_debug_label)
11063 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11064 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11065 CODE_LABEL_NUMBER (insn) = -1;
11071 /* Return a scratch register to use in the split stack prologue. The
11072 split stack prologue is used for -fsplit-stack. It is the first
11073 instructions in the function, even before the regular prologue.
11074 The scratch register can be any caller-saved register which is not
11075 used for parameters or for the static chain. */
11077 static unsigned int
11078 split_stack_prologue_scratch_regno (void)
11087 is_fastcall = (lookup_attribute ("fastcall",
11088 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11090 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11094 if (DECL_STATIC_CHAIN (cfun->decl))
11096 sorry ("-fsplit-stack does not support fastcall with "
11097 "nested function");
11098 return INVALID_REGNUM;
11102 else if (regparm < 3)
11104 if (!DECL_STATIC_CHAIN (cfun->decl))
11110 sorry ("-fsplit-stack does not support 2 register "
11111 " parameters for a nested function");
11112 return INVALID_REGNUM;
11119 /* FIXME: We could make this work by pushing a register
11120 around the addition and comparison. */
11121 sorry ("-fsplit-stack does not support 3 register parameters");
11122 return INVALID_REGNUM;
11127 /* A SYMBOL_REF for the function which allocates new stackspace for
11130 static GTY(()) rtx split_stack_fn;
11132 /* A SYMBOL_REF for the more stack function when using the large
11135 static GTY(()) rtx split_stack_fn_large;
11137 /* Handle -fsplit-stack. These are the first instructions in the
11138 function, even before the regular prologue. */
11141 ix86_expand_split_stack_prologue (void)
11143 struct ix86_frame frame;
11144 HOST_WIDE_INT allocate;
11145 unsigned HOST_WIDE_INT args_size;
11146 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11147 rtx scratch_reg = NULL_RTX;
11148 rtx varargs_label = NULL_RTX;
11151 gcc_assert (flag_split_stack && reload_completed);
11153 ix86_finalize_stack_realign_flags ();
11154 ix86_compute_frame_layout (&frame);
11155 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11157 /* This is the label we will branch to if we have enough stack
11158 space. We expect the basic block reordering pass to reverse this
11159 branch if optimizing, so that we branch in the unlikely case. */
11160 label = gen_label_rtx ();
11162 /* We need to compare the stack pointer minus the frame size with
11163 the stack boundary in the TCB. The stack boundary always gives
11164 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11165 can compare directly. Otherwise we need to do an addition. */
11167 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11168 UNSPEC_STACK_CHECK);
11169 limit = gen_rtx_CONST (Pmode, limit);
11170 limit = gen_rtx_MEM (Pmode, limit);
11171 if (allocate < SPLIT_STACK_AVAILABLE)
11172 current = stack_pointer_rtx;
11175 unsigned int scratch_regno;
11178 /* We need a scratch register to hold the stack pointer minus
11179 the required frame size. Since this is the very start of the
11180 function, the scratch register can be any caller-saved
11181 register which is not used for parameters. */
11182 offset = GEN_INT (- allocate);
11183 scratch_regno = split_stack_prologue_scratch_regno ();
11184 if (scratch_regno == INVALID_REGNUM)
11186 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11187 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11189 /* We don't use ix86_gen_add3 in this case because it will
11190 want to split to lea, but when not optimizing the insn
11191 will not be split after this point. */
11192 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11193 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11198 emit_move_insn (scratch_reg, offset);
11199 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11200 stack_pointer_rtx));
11202 current = scratch_reg;
11205 ix86_expand_branch (GEU, current, limit, label);
11206 jump_insn = get_last_insn ();
11207 JUMP_LABEL (jump_insn) = label;
11209 /* Mark the jump as very likely to be taken. */
11210 add_reg_note (jump_insn, REG_BR_PROB,
11211 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11213 if (split_stack_fn == NULL_RTX)
11214 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11215 fn = split_stack_fn;
11217 /* Get more stack space. We pass in the desired stack space and the
11218 size of the arguments to copy to the new stack. In 32-bit mode
11219 we push the parameters; __morestack will return on a new stack
11220 anyhow. In 64-bit mode we pass the parameters in r10 and
11222 allocate_rtx = GEN_INT (allocate);
11223 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11224 call_fusage = NULL_RTX;
11229 reg10 = gen_rtx_REG (Pmode, R10_REG);
11230 reg11 = gen_rtx_REG (Pmode, R11_REG);
11232 /* If this function uses a static chain, it will be in %r10.
11233 Preserve it across the call to __morestack. */
11234 if (DECL_STATIC_CHAIN (cfun->decl))
11238 rax = gen_rtx_REG (Pmode, AX_REG);
11239 emit_move_insn (rax, reg10);
11240 use_reg (&call_fusage, rax);
11243 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11245 HOST_WIDE_INT argval;
11247 /* When using the large model we need to load the address
11248 into a register, and we've run out of registers. So we
11249 switch to a different calling convention, and we call a
11250 different function: __morestack_large. We pass the
11251 argument size in the upper 32 bits of r10 and pass the
11252 frame size in the lower 32 bits. */
11253 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11254 gcc_assert ((args_size & 0xffffffff) == args_size);
11256 if (split_stack_fn_large == NULL_RTX)
11257 split_stack_fn_large =
11258 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11260 if (ix86_cmodel == CM_LARGE_PIC)
11264 label = gen_label_rtx ();
11265 emit_label (label);
11266 LABEL_PRESERVE_P (label) = 1;
11267 emit_insn (gen_set_rip_rex64 (reg10, label));
11268 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11269 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11270 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11272 x = gen_rtx_CONST (Pmode, x);
11273 emit_move_insn (reg11, x);
11274 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11275 x = gen_const_mem (Pmode, x);
11276 emit_move_insn (reg11, x);
11279 emit_move_insn (reg11, split_stack_fn_large);
11283 argval = ((args_size << 16) << 16) + allocate;
11284 emit_move_insn (reg10, GEN_INT (argval));
11288 emit_move_insn (reg10, allocate_rtx);
11289 emit_move_insn (reg11, GEN_INT (args_size));
11290 use_reg (&call_fusage, reg11);
11293 use_reg (&call_fusage, reg10);
11297 emit_insn (gen_push (GEN_INT (args_size)));
11298 emit_insn (gen_push (allocate_rtx));
11300 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11301 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11303 add_function_usage_to (call_insn, call_fusage);
11305 /* In order to make call/return prediction work right, we now need
11306 to execute a return instruction. See
11307 libgcc/config/i386/morestack.S for the details on how this works.
11309 For flow purposes gcc must not see this as a return
11310 instruction--we need control flow to continue at the subsequent
11311 label. Therefore, we use an unspec. */
11312 gcc_assert (crtl->args.pops_args < 65536);
11313 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11315 /* If we are in 64-bit mode and this function uses a static chain,
11316 we saved %r10 in %rax before calling _morestack. */
11317 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11318 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11319 gen_rtx_REG (Pmode, AX_REG));
11321 /* If this function calls va_start, we need to store a pointer to
11322 the arguments on the old stack, because they may not have been
11323 all copied to the new stack. At this point the old stack can be
11324 found at the frame pointer value used by __morestack, because
11325 __morestack has set that up before calling back to us. Here we
11326 store that pointer in a scratch register, and in
11327 ix86_expand_prologue we store the scratch register in a stack
11329 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11331 unsigned int scratch_regno;
11335 scratch_regno = split_stack_prologue_scratch_regno ();
11336 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11337 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11341 return address within this function
11342 return address of caller of this function
11344 So we add three words to get to the stack arguments.
11348 return address within this function
11349 first argument to __morestack
11350 second argument to __morestack
11351 return address of caller of this function
11353 So we add five words to get to the stack arguments.
11355 words = TARGET_64BIT ? 3 : 5;
11356 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11357 gen_rtx_PLUS (Pmode, frame_reg,
11358 GEN_INT (words * UNITS_PER_WORD))));
11360 varargs_label = gen_label_rtx ();
11361 emit_jump_insn (gen_jump (varargs_label));
11362 JUMP_LABEL (get_last_insn ()) = varargs_label;
11367 emit_label (label);
11368 LABEL_NUSES (label) = 1;
11370 /* If this function calls va_start, we now have to set the scratch
11371 register for the case where we do not call __morestack. In this
11372 case we need to set it based on the stack pointer. */
11373 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11375 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11376 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11377 GEN_INT (UNITS_PER_WORD))));
11379 emit_label (varargs_label);
11380 LABEL_NUSES (varargs_label) = 1;
11384 /* We may have to tell the dataflow pass that the split stack prologue
11385 is initializing a scratch register. */
11388 ix86_live_on_entry (bitmap regs)
11390 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11392 gcc_assert (flag_split_stack);
11393 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11397 /* Determine if op is suitable SUBREG RTX for address. */
11400 ix86_address_subreg_operand (rtx op)
11402 enum machine_mode mode;
11407 mode = GET_MODE (op);
11409 if (GET_MODE_CLASS (mode) != MODE_INT)
11412 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11413 failures when the register is one word out of a two word structure. */
11414 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11417 /* Allow only SUBREGs of non-eliminable hard registers. */
11418 return register_no_elim_operand (op, mode);
11421 /* Extract the parts of an RTL expression that is a valid memory address
11422 for an instruction. Return 0 if the structure of the address is
11423 grossly off. Return -1 if the address contains ASHIFT, so it is not
11424 strictly valid, but still used for computing length of lea instruction. */
11427 ix86_decompose_address (rtx addr, struct ix86_address *out)
11429 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11430 rtx base_reg, index_reg;
11431 HOST_WIDE_INT scale = 1;
11432 rtx scale_rtx = NULL_RTX;
11435 enum ix86_address_seg seg = SEG_DEFAULT;
11437 /* Allow zero-extended SImode addresses,
11438 they will be emitted with addr32 prefix. */
11439 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11441 if (GET_CODE (addr) == ZERO_EXTEND
11442 && GET_MODE (XEXP (addr, 0)) == SImode)
11444 addr = XEXP (addr, 0);
11445 if (CONST_INT_P (addr))
11448 else if (GET_CODE (addr) == AND
11449 && const_32bit_mask (XEXP (addr, 1), DImode))
11451 addr = XEXP (addr, 0);
11453 /* Adjust SUBREGs. */
11454 if (GET_CODE (addr) == SUBREG
11455 && GET_MODE (SUBREG_REG (addr)) == SImode)
11457 addr = SUBREG_REG (addr);
11458 if (CONST_INT_P (addr))
11461 else if (GET_MODE (addr) == DImode)
11462 addr = gen_rtx_SUBREG (SImode, addr, 0);
11463 else if (GET_MODE (addr) != VOIDmode)
11468 /* Allow SImode subregs of DImode addresses,
11469 they will be emitted with addr32 prefix. */
11470 if (TARGET_64BIT && GET_MODE (addr) == SImode)
11472 if (GET_CODE (addr) == SUBREG
11473 && GET_MODE (SUBREG_REG (addr)) == DImode)
11475 addr = SUBREG_REG (addr);
11476 if (CONST_INT_P (addr))
11483 else if (GET_CODE (addr) == SUBREG)
11485 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11490 else if (GET_CODE (addr) == PLUS)
11492 rtx addends[4], op;
11500 addends[n++] = XEXP (op, 1);
11503 while (GET_CODE (op) == PLUS);
11508 for (i = n; i >= 0; --i)
11511 switch (GET_CODE (op))
11516 index = XEXP (op, 0);
11517 scale_rtx = XEXP (op, 1);
11523 index = XEXP (op, 0);
11524 tmp = XEXP (op, 1);
11525 if (!CONST_INT_P (tmp))
11527 scale = INTVAL (tmp);
11528 if ((unsigned HOST_WIDE_INT) scale > 3)
11530 scale = 1 << scale;
11534 if (XINT (op, 1) == UNSPEC_TP
11535 && TARGET_TLS_DIRECT_SEG_REFS
11536 && seg == SEG_DEFAULT)
11537 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11543 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11570 else if (GET_CODE (addr) == MULT)
11572 index = XEXP (addr, 0); /* index*scale */
11573 scale_rtx = XEXP (addr, 1);
11575 else if (GET_CODE (addr) == ASHIFT)
11577 /* We're called for lea too, which implements ashift on occasion. */
11578 index = XEXP (addr, 0);
11579 tmp = XEXP (addr, 1);
11580 if (!CONST_INT_P (tmp))
11582 scale = INTVAL (tmp);
11583 if ((unsigned HOST_WIDE_INT) scale > 3)
11585 scale = 1 << scale;
11588 else if (CONST_INT_P (addr))
11590 if (!x86_64_immediate_operand (addr, VOIDmode))
11593 /* Constant addresses are sign extended to 64bit, we have to
11594 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11596 && val_signbit_known_set_p (SImode, INTVAL (addr)))
11602 disp = addr; /* displacement */
11608 else if (GET_CODE (index) == SUBREG
11609 && ix86_address_subreg_operand (SUBREG_REG (index)))
11615 /* Extract the integral value of scale. */
11618 if (!CONST_INT_P (scale_rtx))
11620 scale = INTVAL (scale_rtx);
11623 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11624 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11626 /* Avoid useless 0 displacement. */
11627 if (disp == const0_rtx && (base || index))
11630 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11631 if (base_reg && index_reg && scale == 1
11632 && (index_reg == arg_pointer_rtx
11633 || index_reg == frame_pointer_rtx
11634 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11637 tmp = base, base = index, index = tmp;
11638 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11641 /* Special case: %ebp cannot be encoded as a base without a displacement.
11645 && (base_reg == hard_frame_pointer_rtx
11646 || base_reg == frame_pointer_rtx
11647 || base_reg == arg_pointer_rtx
11648 || (REG_P (base_reg)
11649 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11650 || REGNO (base_reg) == R13_REG))))
11653 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11654 Avoid this by transforming to [%esi+0].
11655 Reload calls address legitimization without cfun defined, so we need
11656 to test cfun for being non-NULL. */
11657 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11658 && base_reg && !index_reg && !disp
11659 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11662 /* Special case: encode reg+reg instead of reg*2. */
11663 if (!base && index && scale == 2)
11664 base = index, base_reg = index_reg, scale = 1;
11666 /* Special case: scaling cannot be encoded without base or displacement. */
11667 if (!base && !disp && index && scale != 1)
11671 out->index = index;
11673 out->scale = scale;
11679 /* Return cost of the memory address x.
11680 For i386, it is better to use a complex address than let gcc copy
11681 the address into a reg and make a new pseudo. But not if the address
11682 requires to two regs - that would mean more pseudos with longer
11685 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11687 struct ix86_address parts;
11689 int ok = ix86_decompose_address (x, &parts);
11693 if (parts.base && GET_CODE (parts.base) == SUBREG)
11694 parts.base = SUBREG_REG (parts.base);
11695 if (parts.index && GET_CODE (parts.index) == SUBREG)
11696 parts.index = SUBREG_REG (parts.index);
11698 /* Attempt to minimize number of registers in the address. */
11700 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11702 && (!REG_P (parts.index)
11703 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11707 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11709 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11710 && parts.base != parts.index)
11713 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11714 since it's predecode logic can't detect the length of instructions
11715 and it degenerates to vector decoded. Increase cost of such
11716 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11717 to split such addresses or even refuse such addresses at all.
11719 Following addressing modes are affected:
11724 The first and last case may be avoidable by explicitly coding the zero in
11725 memory address, but I don't have AMD-K6 machine handy to check this
11729 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11730 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11731 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11737 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11738 this is used for to form addresses to local data when -fPIC is in
11742 darwin_local_data_pic (rtx disp)
11744 return (GET_CODE (disp) == UNSPEC
11745 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11748 /* Determine if a given RTX is a valid constant. We already know this
11749 satisfies CONSTANT_P. */
11752 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11754 switch (GET_CODE (x))
11759 if (GET_CODE (x) == PLUS)
11761 if (!CONST_INT_P (XEXP (x, 1)))
11766 if (TARGET_MACHO && darwin_local_data_pic (x))
11769 /* Only some unspecs are valid as "constants". */
11770 if (GET_CODE (x) == UNSPEC)
11771 switch (XINT (x, 1))
11774 case UNSPEC_GOTOFF:
11775 case UNSPEC_PLTOFF:
11776 return TARGET_64BIT;
11778 case UNSPEC_NTPOFF:
11779 x = XVECEXP (x, 0, 0);
11780 return (GET_CODE (x) == SYMBOL_REF
11781 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11782 case UNSPEC_DTPOFF:
11783 x = XVECEXP (x, 0, 0);
11784 return (GET_CODE (x) == SYMBOL_REF
11785 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11790 /* We must have drilled down to a symbol. */
11791 if (GET_CODE (x) == LABEL_REF)
11793 if (GET_CODE (x) != SYMBOL_REF)
11798 /* TLS symbols are never valid. */
11799 if (SYMBOL_REF_TLS_MODEL (x))
11802 /* DLLIMPORT symbols are never valid. */
11803 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11804 && SYMBOL_REF_DLLIMPORT_P (x))
11808 /* mdynamic-no-pic */
11809 if (MACHO_DYNAMIC_NO_PIC_P)
11810 return machopic_symbol_defined_p (x);
11815 if (GET_MODE (x) == TImode
11816 && x != CONST0_RTX (TImode)
11822 if (!standard_sse_constant_p (x))
11829 /* Otherwise we handle everything else in the move patterns. */
11833 /* Determine if it's legal to put X into the constant pool. This
11834 is not possible for the address of thread-local symbols, which
11835 is checked above. */
11838 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11840 /* We can always put integral constants and vectors in memory. */
11841 switch (GET_CODE (x))
11851 return !ix86_legitimate_constant_p (mode, x);
11855 /* Nonzero if the constant value X is a legitimate general operand
11856 when generating PIC code. It is given that flag_pic is on and
11857 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11860 legitimate_pic_operand_p (rtx x)
11864 switch (GET_CODE (x))
11867 inner = XEXP (x, 0);
11868 if (GET_CODE (inner) == PLUS
11869 && CONST_INT_P (XEXP (inner, 1)))
11870 inner = XEXP (inner, 0);
11872 /* Only some unspecs are valid as "constants". */
11873 if (GET_CODE (inner) == UNSPEC)
11874 switch (XINT (inner, 1))
11877 case UNSPEC_GOTOFF:
11878 case UNSPEC_PLTOFF:
11879 return TARGET_64BIT;
11881 x = XVECEXP (inner, 0, 0);
11882 return (GET_CODE (x) == SYMBOL_REF
11883 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11884 case UNSPEC_MACHOPIC_OFFSET:
11885 return legitimate_pic_address_disp_p (x);
11893 return legitimate_pic_address_disp_p (x);
11900 /* Determine if a given CONST RTX is a valid memory displacement
11904 legitimate_pic_address_disp_p (rtx disp)
11908 /* In 64bit mode we can allow direct addresses of symbols and labels
11909 when they are not dynamic symbols. */
11912 rtx op0 = disp, op1;
11914 switch (GET_CODE (disp))
11920 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11922 op0 = XEXP (XEXP (disp, 0), 0);
11923 op1 = XEXP (XEXP (disp, 0), 1);
11924 if (!CONST_INT_P (op1)
11925 || INTVAL (op1) >= 16*1024*1024
11926 || INTVAL (op1) < -16*1024*1024)
11928 if (GET_CODE (op0) == LABEL_REF)
11930 if (GET_CODE (op0) == CONST
11931 && GET_CODE (XEXP (op0, 0)) == UNSPEC
11932 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
11934 if (GET_CODE (op0) == UNSPEC
11935 && XINT (op0, 1) == UNSPEC_PCREL)
11937 if (GET_CODE (op0) != SYMBOL_REF)
11942 /* TLS references should always be enclosed in UNSPEC. */
11943 if (SYMBOL_REF_TLS_MODEL (op0))
11945 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11946 && ix86_cmodel != CM_LARGE_PIC)
11954 if (GET_CODE (disp) != CONST)
11956 disp = XEXP (disp, 0);
11960 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11961 of GOT tables. We should not need these anyway. */
11962 if (GET_CODE (disp) != UNSPEC
11963 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11964 && XINT (disp, 1) != UNSPEC_GOTOFF
11965 && XINT (disp, 1) != UNSPEC_PCREL
11966 && XINT (disp, 1) != UNSPEC_PLTOFF))
11969 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11970 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11976 if (GET_CODE (disp) == PLUS)
11978 if (!CONST_INT_P (XEXP (disp, 1)))
11980 disp = XEXP (disp, 0);
11984 if (TARGET_MACHO && darwin_local_data_pic (disp))
11987 if (GET_CODE (disp) != UNSPEC)
11990 switch (XINT (disp, 1))
11995 /* We need to check for both symbols and labels because VxWorks loads
11996 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11998 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11999 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12000 case UNSPEC_GOTOFF:
12001 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12002 While ABI specify also 32bit relocation but we don't produce it in
12003 small PIC model at all. */
12004 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12005 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12007 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12009 case UNSPEC_GOTTPOFF:
12010 case UNSPEC_GOTNTPOFF:
12011 case UNSPEC_INDNTPOFF:
12014 disp = XVECEXP (disp, 0, 0);
12015 return (GET_CODE (disp) == SYMBOL_REF
12016 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12017 case UNSPEC_NTPOFF:
12018 disp = XVECEXP (disp, 0, 0);
12019 return (GET_CODE (disp) == SYMBOL_REF
12020 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12021 case UNSPEC_DTPOFF:
12022 disp = XVECEXP (disp, 0, 0);
12023 return (GET_CODE (disp) == SYMBOL_REF
12024 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12030 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12031 replace the input X, or the original X if no replacement is called for.
12032 The output parameter *WIN is 1 if the calling macro should goto WIN,
12033 0 if it should not. */
12036 ix86_legitimize_reload_address (rtx x,
12037 enum machine_mode mode ATTRIBUTE_UNUSED,
12038 int opnum, int type,
12039 int ind_levels ATTRIBUTE_UNUSED)
12041 /* Reload can generate:
12043 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12047 This RTX is rejected from ix86_legitimate_address_p due to
12048 non-strictness of base register 97. Following this rejection,
12049 reload pushes all three components into separate registers,
12050 creating invalid memory address RTX.
12052 Following code reloads only the invalid part of the
12053 memory address RTX. */
12055 if (GET_CODE (x) == PLUS
12056 && REG_P (XEXP (x, 1))
12057 && GET_CODE (XEXP (x, 0)) == PLUS
12058 && REG_P (XEXP (XEXP (x, 0), 1)))
12061 bool something_reloaded = false;
12063 base = XEXP (XEXP (x, 0), 1);
12064 if (!REG_OK_FOR_BASE_STRICT_P (base))
12066 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12067 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12068 opnum, (enum reload_type)type);
12069 something_reloaded = true;
12072 index = XEXP (x, 1);
12073 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12075 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12076 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12077 opnum, (enum reload_type)type);
12078 something_reloaded = true;
12081 gcc_assert (something_reloaded);
12088 /* Recognizes RTL expressions that are valid memory addresses for an
12089 instruction. The MODE argument is the machine mode for the MEM
12090 expression that wants to use this address.
12092 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12093 convert common non-canonical forms to canonical form so that they will
12097 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12098 rtx addr, bool strict)
12100 struct ix86_address parts;
12101 rtx base, index, disp;
12102 HOST_WIDE_INT scale;
12104 if (ix86_decompose_address (addr, &parts) <= 0)
12105 /* Decomposition failed. */
12109 index = parts.index;
12111 scale = parts.scale;
12113 /* Validate base register. */
12120 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
12121 reg = SUBREG_REG (base);
12123 /* Base is not a register. */
12126 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
12129 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12130 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12131 /* Base is not valid. */
12135 /* Validate index register. */
12142 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
12143 reg = SUBREG_REG (index);
12145 /* Index is not a register. */
12148 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
12151 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12152 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12153 /* Index is not valid. */
12157 /* Index and base should have the same mode. */
12159 && GET_MODE (base) != GET_MODE (index))
12162 /* Validate scale factor. */
12166 /* Scale without index. */
12169 if (scale != 2 && scale != 4 && scale != 8)
12170 /* Scale is not a valid multiplier. */
12174 /* Validate displacement. */
12177 if (GET_CODE (disp) == CONST
12178 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12179 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12180 switch (XINT (XEXP (disp, 0), 1))
12182 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12183 used. While ABI specify also 32bit relocations, we don't produce
12184 them at all and use IP relative instead. */
12186 case UNSPEC_GOTOFF:
12187 gcc_assert (flag_pic);
12189 goto is_legitimate_pic;
12191 /* 64bit address unspec. */
12194 case UNSPEC_GOTPCREL:
12196 gcc_assert (flag_pic);
12197 goto is_legitimate_pic;
12199 case UNSPEC_GOTTPOFF:
12200 case UNSPEC_GOTNTPOFF:
12201 case UNSPEC_INDNTPOFF:
12202 case UNSPEC_NTPOFF:
12203 case UNSPEC_DTPOFF:
12206 case UNSPEC_STACK_CHECK:
12207 gcc_assert (flag_split_stack);
12211 /* Invalid address unspec. */
12215 else if (SYMBOLIC_CONST (disp)
12219 && MACHOPIC_INDIRECT
12220 && !machopic_operand_p (disp)
12226 if (TARGET_64BIT && (index || base))
12228 /* foo@dtpoff(%rX) is ok. */
12229 if (GET_CODE (disp) != CONST
12230 || GET_CODE (XEXP (disp, 0)) != PLUS
12231 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12232 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12233 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12234 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12235 /* Non-constant pic memory reference. */
12238 else if ((!TARGET_MACHO || flag_pic)
12239 && ! legitimate_pic_address_disp_p (disp))
12240 /* Displacement is an invalid pic construct. */
12243 else if (MACHO_DYNAMIC_NO_PIC_P
12244 && !ix86_legitimate_constant_p (Pmode, disp))
12245 /* displacment must be referenced via non_lazy_pointer */
12249 /* This code used to verify that a symbolic pic displacement
12250 includes the pic_offset_table_rtx register.
12252 While this is good idea, unfortunately these constructs may
12253 be created by "adds using lea" optimization for incorrect
12262 This code is nonsensical, but results in addressing
12263 GOT table with pic_offset_table_rtx base. We can't
12264 just refuse it easily, since it gets matched by
12265 "addsi3" pattern, that later gets split to lea in the
12266 case output register differs from input. While this
12267 can be handled by separate addsi pattern for this case
12268 that never results in lea, this seems to be easier and
12269 correct fix for crash to disable this test. */
12271 else if (GET_CODE (disp) != LABEL_REF
12272 && !CONST_INT_P (disp)
12273 && (GET_CODE (disp) != CONST
12274 || !ix86_legitimate_constant_p (Pmode, disp))
12275 && (GET_CODE (disp) != SYMBOL_REF
12276 || !ix86_legitimate_constant_p (Pmode, disp)))
12277 /* Displacement is not constant. */
12279 else if (TARGET_64BIT
12280 && !x86_64_immediate_operand (disp, VOIDmode))
12281 /* Displacement is out of range. */
12285 /* Everything looks valid. */
12289 /* Determine if a given RTX is a valid constant address. */
12292 constant_address_p (rtx x)
12294 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12297 /* Return a unique alias set for the GOT. */
12299 static alias_set_type
12300 ix86_GOT_alias_set (void)
12302 static alias_set_type set = -1;
12304 set = new_alias_set ();
12308 /* Return a legitimate reference for ORIG (an address) using the
12309 register REG. If REG is 0, a new pseudo is generated.
12311 There are two types of references that must be handled:
12313 1. Global data references must load the address from the GOT, via
12314 the PIC reg. An insn is emitted to do this load, and the reg is
12317 2. Static data references, constant pool addresses, and code labels
12318 compute the address as an offset from the GOT, whose base is in
12319 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12320 differentiate them from global data objects. The returned
12321 address is the PIC reg + an unspec constant.
12323 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12324 reg also appears in the address. */
12327 legitimize_pic_address (rtx orig, rtx reg)
12330 rtx new_rtx = orig;
12334 if (TARGET_MACHO && !TARGET_64BIT)
12337 reg = gen_reg_rtx (Pmode);
12338 /* Use the generic Mach-O PIC machinery. */
12339 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12343 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12345 else if (TARGET_64BIT
12346 && ix86_cmodel != CM_SMALL_PIC
12347 && gotoff_operand (addr, Pmode))
12350 /* This symbol may be referenced via a displacement from the PIC
12351 base address (@GOTOFF). */
12353 if (reload_in_progress)
12354 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12355 if (GET_CODE (addr) == CONST)
12356 addr = XEXP (addr, 0);
12357 if (GET_CODE (addr) == PLUS)
12359 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12361 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12364 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12365 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12367 tmpreg = gen_reg_rtx (Pmode);
12370 emit_move_insn (tmpreg, new_rtx);
12374 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12375 tmpreg, 1, OPTAB_DIRECT);
12378 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12380 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12382 /* This symbol may be referenced via a displacement from the PIC
12383 base address (@GOTOFF). */
12385 if (reload_in_progress)
12386 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12387 if (GET_CODE (addr) == CONST)
12388 addr = XEXP (addr, 0);
12389 if (GET_CODE (addr) == PLUS)
12391 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12393 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12396 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12397 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12398 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12402 emit_move_insn (reg, new_rtx);
12406 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12407 /* We can't use @GOTOFF for text labels on VxWorks;
12408 see gotoff_operand. */
12409 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12411 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12413 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12414 return legitimize_dllimport_symbol (addr, true);
12415 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12416 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12417 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12419 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12420 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12424 /* For x64 PE-COFF there is no GOT table. So we use address
12426 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12428 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12429 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12432 reg = gen_reg_rtx (Pmode);
12433 emit_move_insn (reg, new_rtx);
12436 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12438 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12439 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12440 new_rtx = gen_const_mem (Pmode, new_rtx);
12441 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12444 reg = gen_reg_rtx (Pmode);
12445 /* Use directly gen_movsi, otherwise the address is loaded
12446 into register for CSE. We don't want to CSE this addresses,
12447 instead we CSE addresses from the GOT table, so skip this. */
12448 emit_insn (gen_movsi (reg, new_rtx));
12453 /* This symbol must be referenced via a load from the
12454 Global Offset Table (@GOT). */
12456 if (reload_in_progress)
12457 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12458 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12459 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12461 new_rtx = force_reg (Pmode, new_rtx);
12462 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12463 new_rtx = gen_const_mem (Pmode, new_rtx);
12464 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12467 reg = gen_reg_rtx (Pmode);
12468 emit_move_insn (reg, new_rtx);
12474 if (CONST_INT_P (addr)
12475 && !x86_64_immediate_operand (addr, VOIDmode))
12479 emit_move_insn (reg, addr);
12483 new_rtx = force_reg (Pmode, addr);
12485 else if (GET_CODE (addr) == CONST)
12487 addr = XEXP (addr, 0);
12489 /* We must match stuff we generate before. Assume the only
12490 unspecs that can get here are ours. Not that we could do
12491 anything with them anyway.... */
12492 if (GET_CODE (addr) == UNSPEC
12493 || (GET_CODE (addr) == PLUS
12494 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12496 gcc_assert (GET_CODE (addr) == PLUS);
12498 if (GET_CODE (addr) == PLUS)
12500 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12502 /* Check first to see if this is a constant offset from a @GOTOFF
12503 symbol reference. */
12504 if (gotoff_operand (op0, Pmode)
12505 && CONST_INT_P (op1))
12509 if (reload_in_progress)
12510 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12511 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12513 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12514 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12515 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12519 emit_move_insn (reg, new_rtx);
12525 if (INTVAL (op1) < -16*1024*1024
12526 || INTVAL (op1) >= 16*1024*1024)
12528 if (!x86_64_immediate_operand (op1, Pmode))
12529 op1 = force_reg (Pmode, op1);
12530 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12536 base = legitimize_pic_address (XEXP (addr, 0), reg);
12537 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12538 base == reg ? NULL_RTX : reg);
12540 if (CONST_INT_P (new_rtx))
12541 new_rtx = plus_constant (base, INTVAL (new_rtx));
12544 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12546 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12547 new_rtx = XEXP (new_rtx, 1);
12549 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12557 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12560 get_thread_pointer (bool to_reg)
12562 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12564 if (GET_MODE (tp) != Pmode)
12565 tp = convert_to_mode (Pmode, tp, 1);
12568 tp = copy_addr_to_reg (tp);
12573 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12575 static GTY(()) rtx ix86_tls_symbol;
12578 ix86_tls_get_addr (void)
12580 if (!ix86_tls_symbol)
12583 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12584 ? "___tls_get_addr" : "__tls_get_addr");
12586 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12589 return ix86_tls_symbol;
12592 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12594 static GTY(()) rtx ix86_tls_module_base_symbol;
12597 ix86_tls_module_base (void)
12599 if (!ix86_tls_module_base_symbol)
12601 ix86_tls_module_base_symbol
12602 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12604 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12605 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12608 return ix86_tls_module_base_symbol;
12611 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12612 false if we expect this to be used for a memory address and true if
12613 we expect to load the address into a register. */
12616 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12618 rtx dest, base, off;
12619 rtx pic = NULL_RTX, tp = NULL_RTX;
12624 case TLS_MODEL_GLOBAL_DYNAMIC:
12625 dest = gen_reg_rtx (Pmode);
12630 pic = pic_offset_table_rtx;
12633 pic = gen_reg_rtx (Pmode);
12634 emit_insn (gen_set_got (pic));
12638 if (TARGET_GNU2_TLS)
12641 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12643 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12645 tp = get_thread_pointer (true);
12646 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12648 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12652 rtx caddr = ix86_tls_get_addr ();
12656 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12659 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12660 insns = get_insns ();
12663 RTL_CONST_CALL_P (insns) = 1;
12664 emit_libcall_block (insns, dest, rax, x);
12667 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12671 case TLS_MODEL_LOCAL_DYNAMIC:
12672 base = gen_reg_rtx (Pmode);
12677 pic = pic_offset_table_rtx;
12680 pic = gen_reg_rtx (Pmode);
12681 emit_insn (gen_set_got (pic));
12685 if (TARGET_GNU2_TLS)
12687 rtx tmp = ix86_tls_module_base ();
12690 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12692 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12694 tp = get_thread_pointer (true);
12695 set_unique_reg_note (get_last_insn (), REG_EQUAL,
12696 gen_rtx_MINUS (Pmode, tmp, tp));
12700 rtx caddr = ix86_tls_get_addr ();
12704 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12707 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12708 insns = get_insns ();
12711 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12712 share the LD_BASE result with other LD model accesses. */
12713 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12714 UNSPEC_TLS_LD_BASE);
12716 RTL_CONST_CALL_P (insns) = 1;
12717 emit_libcall_block (insns, base, rax, eqv);
12720 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12723 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12724 off = gen_rtx_CONST (Pmode, off);
12726 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12728 if (TARGET_GNU2_TLS)
12730 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12732 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12736 case TLS_MODEL_INITIAL_EXEC:
12739 if (TARGET_SUN_TLS)
12741 /* The Sun linker took the AMD64 TLS spec literally
12742 and can only handle %rax as destination of the
12743 initial executable code sequence. */
12745 dest = gen_reg_rtx (Pmode);
12746 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12751 type = UNSPEC_GOTNTPOFF;
12755 if (reload_in_progress)
12756 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12757 pic = pic_offset_table_rtx;
12758 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12760 else if (!TARGET_ANY_GNU_TLS)
12762 pic = gen_reg_rtx (Pmode);
12763 emit_insn (gen_set_got (pic));
12764 type = UNSPEC_GOTTPOFF;
12769 type = UNSPEC_INDNTPOFF;
12772 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12773 off = gen_rtx_CONST (Pmode, off);
12775 off = gen_rtx_PLUS (Pmode, pic, off);
12776 off = gen_const_mem (Pmode, off);
12777 set_mem_alias_set (off, ix86_GOT_alias_set ());
12779 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12781 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12782 off = force_reg (Pmode, off);
12783 return gen_rtx_PLUS (Pmode, base, off);
12787 base = get_thread_pointer (true);
12788 dest = gen_reg_rtx (Pmode);
12789 emit_insn (gen_subsi3 (dest, base, off));
12793 case TLS_MODEL_LOCAL_EXEC:
12794 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12795 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12796 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12797 off = gen_rtx_CONST (Pmode, off);
12799 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12801 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12802 return gen_rtx_PLUS (Pmode, base, off);
12806 base = get_thread_pointer (true);
12807 dest = gen_reg_rtx (Pmode);
12808 emit_insn (gen_subsi3 (dest, base, off));
12813 gcc_unreachable ();
12819 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12822 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12823 htab_t dllimport_map;
12826 get_dllimport_decl (tree decl)
12828 struct tree_map *h, in;
12831 const char *prefix;
12832 size_t namelen, prefixlen;
12837 if (!dllimport_map)
12838 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12840 in.hash = htab_hash_pointer (decl);
12841 in.base.from = decl;
12842 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12843 h = (struct tree_map *) *loc;
12847 *loc = h = ggc_alloc_tree_map ();
12849 h->base.from = decl;
12850 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12851 VAR_DECL, NULL, ptr_type_node);
12852 DECL_ARTIFICIAL (to) = 1;
12853 DECL_IGNORED_P (to) = 1;
12854 DECL_EXTERNAL (to) = 1;
12855 TREE_READONLY (to) = 1;
12857 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12858 name = targetm.strip_name_encoding (name);
12859 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12860 ? "*__imp_" : "*__imp__";
12861 namelen = strlen (name);
12862 prefixlen = strlen (prefix);
12863 imp_name = (char *) alloca (namelen + prefixlen + 1);
12864 memcpy (imp_name, prefix, prefixlen);
12865 memcpy (imp_name + prefixlen, name, namelen + 1);
12867 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12868 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12869 SET_SYMBOL_REF_DECL (rtl, to);
12870 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12872 rtl = gen_const_mem (Pmode, rtl);
12873 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12875 SET_DECL_RTL (to, rtl);
12876 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12881 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12882 true if we require the result be a register. */
12885 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12890 gcc_assert (SYMBOL_REF_DECL (symbol));
12891 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12893 x = DECL_RTL (imp_decl);
12895 x = force_reg (Pmode, x);
12899 /* Try machine-dependent ways of modifying an illegitimate address
12900 to be legitimate. If we find one, return the new, valid address.
12901 This macro is used in only one place: `memory_address' in explow.c.
12903 OLDX is the address as it was before break_out_memory_refs was called.
12904 In some cases it is useful to look at this to decide what needs to be done.
12906 It is always safe for this macro to do nothing. It exists to recognize
12907 opportunities to optimize the output.
12909 For the 80386, we handle X+REG by loading X into a register R and
12910 using R+REG. R will go in a general reg and indexing will be used.
12911 However, if REG is a broken-out memory address or multiplication,
12912 nothing needs to be done because REG can certainly go in a general reg.
12914 When -fpic is used, special handling is needed for symbolic references.
12915 See comments by legitimize_pic_address in i386.c for details. */
12918 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12919 enum machine_mode mode)
12924 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12926 return legitimize_tls_address (x, (enum tls_model) log, false);
12927 if (GET_CODE (x) == CONST
12928 && GET_CODE (XEXP (x, 0)) == PLUS
12929 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12930 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12932 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12933 (enum tls_model) log, false);
12934 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12937 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12939 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12940 return legitimize_dllimport_symbol (x, true);
12941 if (GET_CODE (x) == CONST
12942 && GET_CODE (XEXP (x, 0)) == PLUS
12943 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12944 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12946 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12947 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12951 if (flag_pic && SYMBOLIC_CONST (x))
12952 return legitimize_pic_address (x, 0);
12955 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12956 return machopic_indirect_data_reference (x, 0);
12959 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12960 if (GET_CODE (x) == ASHIFT
12961 && CONST_INT_P (XEXP (x, 1))
12962 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12965 log = INTVAL (XEXP (x, 1));
12966 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12967 GEN_INT (1 << log));
12970 if (GET_CODE (x) == PLUS)
12972 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12974 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12975 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12976 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12979 log = INTVAL (XEXP (XEXP (x, 0), 1));
12980 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12981 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12982 GEN_INT (1 << log));
12985 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12986 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12987 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12990 log = INTVAL (XEXP (XEXP (x, 1), 1));
12991 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12992 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12993 GEN_INT (1 << log));
12996 /* Put multiply first if it isn't already. */
12997 if (GET_CODE (XEXP (x, 1)) == MULT)
12999 rtx tmp = XEXP (x, 0);
13000 XEXP (x, 0) = XEXP (x, 1);
13005 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13006 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13007 created by virtual register instantiation, register elimination, and
13008 similar optimizations. */
13009 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13012 x = gen_rtx_PLUS (Pmode,
13013 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13014 XEXP (XEXP (x, 1), 0)),
13015 XEXP (XEXP (x, 1), 1));
13019 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13020 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13021 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13022 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13023 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13024 && CONSTANT_P (XEXP (x, 1)))
13027 rtx other = NULL_RTX;
13029 if (CONST_INT_P (XEXP (x, 1)))
13031 constant = XEXP (x, 1);
13032 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13034 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13036 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13037 other = XEXP (x, 1);
13045 x = gen_rtx_PLUS (Pmode,
13046 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13047 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13048 plus_constant (other, INTVAL (constant)));
13052 if (changed && ix86_legitimate_address_p (mode, x, false))
13055 if (GET_CODE (XEXP (x, 0)) == MULT)
13058 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13061 if (GET_CODE (XEXP (x, 1)) == MULT)
13064 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13068 && REG_P (XEXP (x, 1))
13069 && REG_P (XEXP (x, 0)))
13072 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13075 x = legitimize_pic_address (x, 0);
13078 if (changed && ix86_legitimate_address_p (mode, x, false))
13081 if (REG_P (XEXP (x, 0)))
13083 rtx temp = gen_reg_rtx (Pmode);
13084 rtx val = force_operand (XEXP (x, 1), temp);
13087 if (GET_MODE (val) != Pmode)
13088 val = convert_to_mode (Pmode, val, 1);
13089 emit_move_insn (temp, val);
13092 XEXP (x, 1) = temp;
13096 else if (REG_P (XEXP (x, 1)))
13098 rtx temp = gen_reg_rtx (Pmode);
13099 rtx val = force_operand (XEXP (x, 0), temp);
13102 if (GET_MODE (val) != Pmode)
13103 val = convert_to_mode (Pmode, val, 1);
13104 emit_move_insn (temp, val);
13107 XEXP (x, 0) = temp;
13115 /* Print an integer constant expression in assembler syntax. Addition
13116 and subtraction are the only arithmetic that may appear in these
13117 expressions. FILE is the stdio stream to write to, X is the rtx, and
13118 CODE is the operand print code from the output string. */
13121 output_pic_addr_const (FILE *file, rtx x, int code)
13125 switch (GET_CODE (x))
13128 gcc_assert (flag_pic);
13133 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13134 output_addr_const (file, x);
13137 const char *name = XSTR (x, 0);
13139 /* Mark the decl as referenced so that cgraph will
13140 output the function. */
13141 if (SYMBOL_REF_DECL (x))
13142 mark_decl_referenced (SYMBOL_REF_DECL (x));
13145 if (MACHOPIC_INDIRECT
13146 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13147 name = machopic_indirection_name (x, /*stub_p=*/true);
13149 assemble_name (file, name);
13151 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13152 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13153 fputs ("@PLT", file);
13160 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13161 assemble_name (asm_out_file, buf);
13165 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13169 /* This used to output parentheses around the expression,
13170 but that does not work on the 386 (either ATT or BSD assembler). */
13171 output_pic_addr_const (file, XEXP (x, 0), code);
13175 if (GET_MODE (x) == VOIDmode)
13177 /* We can use %d if the number is <32 bits and positive. */
13178 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13179 fprintf (file, "0x%lx%08lx",
13180 (unsigned long) CONST_DOUBLE_HIGH (x),
13181 (unsigned long) CONST_DOUBLE_LOW (x));
13183 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13186 /* We can't handle floating point constants;
13187 TARGET_PRINT_OPERAND must handle them. */
13188 output_operand_lossage ("floating constant misused");
13192 /* Some assemblers need integer constants to appear first. */
13193 if (CONST_INT_P (XEXP (x, 0)))
13195 output_pic_addr_const (file, XEXP (x, 0), code);
13197 output_pic_addr_const (file, XEXP (x, 1), code);
13201 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13202 output_pic_addr_const (file, XEXP (x, 1), code);
13204 output_pic_addr_const (file, XEXP (x, 0), code);
13210 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13211 output_pic_addr_const (file, XEXP (x, 0), code);
13213 output_pic_addr_const (file, XEXP (x, 1), code);
13215 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13219 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13221 bool f = i386_asm_output_addr_const_extra (file, x);
13226 gcc_assert (XVECLEN (x, 0) == 1);
13227 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13228 switch (XINT (x, 1))
13231 fputs ("@GOT", file);
13233 case UNSPEC_GOTOFF:
13234 fputs ("@GOTOFF", file);
13236 case UNSPEC_PLTOFF:
13237 fputs ("@PLTOFF", file);
13240 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13241 "(%rip)" : "[rip]", file);
13243 case UNSPEC_GOTPCREL:
13244 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13245 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13247 case UNSPEC_GOTTPOFF:
13248 /* FIXME: This might be @TPOFF in Sun ld too. */
13249 fputs ("@gottpoff", file);
13252 fputs ("@tpoff", file);
13254 case UNSPEC_NTPOFF:
13256 fputs ("@tpoff", file);
13258 fputs ("@ntpoff", file);
13260 case UNSPEC_DTPOFF:
13261 fputs ("@dtpoff", file);
13263 case UNSPEC_GOTNTPOFF:
13265 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13266 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13268 fputs ("@gotntpoff", file);
13270 case UNSPEC_INDNTPOFF:
13271 fputs ("@indntpoff", file);
13274 case UNSPEC_MACHOPIC_OFFSET:
13276 machopic_output_function_base_name (file);
13280 output_operand_lossage ("invalid UNSPEC as operand");
13286 output_operand_lossage ("invalid expression as operand");
13290 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13291 We need to emit DTP-relative relocations. */
13293 static void ATTRIBUTE_UNUSED
13294 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13296 fputs (ASM_LONG, file);
13297 output_addr_const (file, x);
13298 fputs ("@dtpoff", file);
13304 fputs (", 0", file);
13307 gcc_unreachable ();
13311 /* Return true if X is a representation of the PIC register. This copes
13312 with calls from ix86_find_base_term, where the register might have
13313 been replaced by a cselib value. */
13316 ix86_pic_register_p (rtx x)
13318 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13319 return (pic_offset_table_rtx
13320 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13322 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13325 /* Helper function for ix86_delegitimize_address.
13326 Attempt to delegitimize TLS local-exec accesses. */
13329 ix86_delegitimize_tls_address (rtx orig_x)
13331 rtx x = orig_x, unspec;
13332 struct ix86_address addr;
13334 if (!TARGET_TLS_DIRECT_SEG_REFS)
13338 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13340 if (ix86_decompose_address (x, &addr) == 0
13341 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13342 || addr.disp == NULL_RTX
13343 || GET_CODE (addr.disp) != CONST)
13345 unspec = XEXP (addr.disp, 0);
13346 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13347 unspec = XEXP (unspec, 0);
13348 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13350 x = XVECEXP (unspec, 0, 0);
13351 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13352 if (unspec != XEXP (addr.disp, 0))
13353 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13356 rtx idx = addr.index;
13357 if (addr.scale != 1)
13358 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13359 x = gen_rtx_PLUS (Pmode, idx, x);
13362 x = gen_rtx_PLUS (Pmode, addr.base, x);
13363 if (MEM_P (orig_x))
13364 x = replace_equiv_address_nv (orig_x, x);
13368 /* In the name of slightly smaller debug output, and to cater to
13369 general assembler lossage, recognize PIC+GOTOFF and turn it back
13370 into a direct symbol reference.
13372 On Darwin, this is necessary to avoid a crash, because Darwin
13373 has a different PIC label for each routine but the DWARF debugging
13374 information is not associated with any particular routine, so it's
13375 necessary to remove references to the PIC label from RTL stored by
13376 the DWARF output code. */
13379 ix86_delegitimize_address (rtx x)
13381 rtx orig_x = delegitimize_mem_from_attrs (x);
13382 /* addend is NULL or some rtx if x is something+GOTOFF where
13383 something doesn't include the PIC register. */
13384 rtx addend = NULL_RTX;
13385 /* reg_addend is NULL or a multiple of some register. */
13386 rtx reg_addend = NULL_RTX;
13387 /* const_addend is NULL or a const_int. */
13388 rtx const_addend = NULL_RTX;
13389 /* This is the result, or NULL. */
13390 rtx result = NULL_RTX;
13399 if (GET_CODE (x) == CONST
13400 && GET_CODE (XEXP (x, 0)) == PLUS
13401 && GET_MODE (XEXP (x, 0)) == Pmode
13402 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13403 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13404 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13406 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13407 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13408 if (MEM_P (orig_x))
13409 x = replace_equiv_address_nv (orig_x, x);
13412 if (GET_CODE (x) != CONST
13413 || GET_CODE (XEXP (x, 0)) != UNSPEC
13414 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13415 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13416 || (!MEM_P (orig_x) && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL))
13417 return ix86_delegitimize_tls_address (orig_x);
13418 x = XVECEXP (XEXP (x, 0), 0, 0);
13419 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13421 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13429 if (GET_CODE (x) != PLUS
13430 || GET_CODE (XEXP (x, 1)) != CONST)
13431 return ix86_delegitimize_tls_address (orig_x);
13433 if (ix86_pic_register_p (XEXP (x, 0)))
13434 /* %ebx + GOT/GOTOFF */
13436 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13438 /* %ebx + %reg * scale + GOT/GOTOFF */
13439 reg_addend = XEXP (x, 0);
13440 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13441 reg_addend = XEXP (reg_addend, 1);
13442 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13443 reg_addend = XEXP (reg_addend, 0);
13446 reg_addend = NULL_RTX;
13447 addend = XEXP (x, 0);
13451 addend = XEXP (x, 0);
13453 x = XEXP (XEXP (x, 1), 0);
13454 if (GET_CODE (x) == PLUS
13455 && CONST_INT_P (XEXP (x, 1)))
13457 const_addend = XEXP (x, 1);
13461 if (GET_CODE (x) == UNSPEC
13462 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13463 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13464 result = XVECEXP (x, 0, 0);
13466 if (TARGET_MACHO && darwin_local_data_pic (x)
13467 && !MEM_P (orig_x))
13468 result = XVECEXP (x, 0, 0);
13471 return ix86_delegitimize_tls_address (orig_x);
13474 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13476 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13479 /* If the rest of original X doesn't involve the PIC register, add
13480 addend and subtract pic_offset_table_rtx. This can happen e.g.
13482 leal (%ebx, %ecx, 4), %ecx
13484 movl foo@GOTOFF(%ecx), %edx
13485 in which case we return (%ecx - %ebx) + foo. */
13486 if (pic_offset_table_rtx)
13487 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13488 pic_offset_table_rtx),
13493 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13495 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13496 if (result == NULL_RTX)
13502 /* If X is a machine specific address (i.e. a symbol or label being
13503 referenced as a displacement from the GOT implemented using an
13504 UNSPEC), then return the base term. Otherwise return X. */
13507 ix86_find_base_term (rtx x)
13513 if (GET_CODE (x) != CONST)
13515 term = XEXP (x, 0);
13516 if (GET_CODE (term) == PLUS
13517 && (CONST_INT_P (XEXP (term, 1))
13518 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13519 term = XEXP (term, 0);
13520 if (GET_CODE (term) != UNSPEC
13521 || (XINT (term, 1) != UNSPEC_GOTPCREL
13522 && XINT (term, 1) != UNSPEC_PCREL))
13525 return XVECEXP (term, 0, 0);
13528 return ix86_delegitimize_address (x);
13532 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13533 int fp, FILE *file)
13535 const char *suffix;
13537 if (mode == CCFPmode || mode == CCFPUmode)
13539 code = ix86_fp_compare_code_to_integer (code);
13543 code = reverse_condition (code);
13594 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13598 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13599 Those same assemblers have the same but opposite lossage on cmov. */
13600 if (mode == CCmode)
13601 suffix = fp ? "nbe" : "a";
13602 else if (mode == CCCmode)
13605 gcc_unreachable ();
13621 gcc_unreachable ();
13625 gcc_assert (mode == CCmode || mode == CCCmode);
13642 gcc_unreachable ();
13646 /* ??? As above. */
13647 gcc_assert (mode == CCmode || mode == CCCmode);
13648 suffix = fp ? "nb" : "ae";
13651 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13655 /* ??? As above. */
13656 if (mode == CCmode)
13658 else if (mode == CCCmode)
13659 suffix = fp ? "nb" : "ae";
13661 gcc_unreachable ();
13664 suffix = fp ? "u" : "p";
13667 suffix = fp ? "nu" : "np";
13670 gcc_unreachable ();
13672 fputs (suffix, file);
13675 /* Print the name of register X to FILE based on its machine mode and number.
13676 If CODE is 'w', pretend the mode is HImode.
13677 If CODE is 'b', pretend the mode is QImode.
13678 If CODE is 'k', pretend the mode is SImode.
13679 If CODE is 'q', pretend the mode is DImode.
13680 If CODE is 'x', pretend the mode is V4SFmode.
13681 If CODE is 't', pretend the mode is V8SFmode.
13682 If CODE is 'h', pretend the reg is the 'high' byte register.
13683 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13684 If CODE is 'd', duplicate the operand for AVX instruction.
13688 print_reg (rtx x, int code, FILE *file)
13691 unsigned int regno;
13692 bool duplicated = code == 'd' && TARGET_AVX;
13694 if (ASSEMBLER_DIALECT == ASM_ATT)
13699 gcc_assert (TARGET_64BIT);
13700 fputs ("rip", file);
13704 regno = true_regnum (x);
13705 gcc_assert (regno != ARG_POINTER_REGNUM
13706 && regno != FRAME_POINTER_REGNUM
13707 && regno != FLAGS_REG
13708 && regno != FPSR_REG
13709 && regno != FPCR_REG);
13711 if (code == 'w' || MMX_REG_P (x))
13713 else if (code == 'b')
13715 else if (code == 'k')
13717 else if (code == 'q')
13719 else if (code == 'y')
13721 else if (code == 'h')
13723 else if (code == 'x')
13725 else if (code == 't')
13728 code = GET_MODE_SIZE (GET_MODE (x));
13730 /* Irritatingly, AMD extended registers use different naming convention
13731 from the normal registers: "r%d[bwd]" */
13732 if (REX_INT_REGNO_P (regno))
13734 gcc_assert (TARGET_64BIT);
13736 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
13740 error ("extended registers have no high halves");
13755 error ("unsupported operand size for extended register");
13765 if (STACK_TOP_P (x))
13774 if (! ANY_FP_REG_P (x))
13775 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13780 reg = hi_reg_name[regno];
13783 if (regno >= ARRAY_SIZE (qi_reg_name))
13785 reg = qi_reg_name[regno];
13788 if (regno >= ARRAY_SIZE (qi_high_reg_name))
13790 reg = qi_high_reg_name[regno];
13795 gcc_assert (!duplicated);
13797 fputs (hi_reg_name[regno] + 1, file);
13802 gcc_unreachable ();
13808 if (ASSEMBLER_DIALECT == ASM_ATT)
13809 fprintf (file, ", %%%s", reg);
13811 fprintf (file, ", %s", reg);
13815 /* Locate some local-dynamic symbol still in use by this function
13816 so that we can print its name in some tls_local_dynamic_base
13820 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13824 if (GET_CODE (x) == SYMBOL_REF
13825 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13827 cfun->machine->some_ld_name = XSTR (x, 0);
13834 static const char *
13835 get_some_local_dynamic_name (void)
13839 if (cfun->machine->some_ld_name)
13840 return cfun->machine->some_ld_name;
13842 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13843 if (NONDEBUG_INSN_P (insn)
13844 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13845 return cfun->machine->some_ld_name;
13850 /* Meaning of CODE:
13851 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13852 C -- print opcode suffix for set/cmov insn.
13853 c -- like C, but print reversed condition
13854 F,f -- likewise, but for floating-point.
13855 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13857 R -- print the prefix for register names.
13858 z -- print the opcode suffix for the size of the current operand.
13859 Z -- likewise, with special suffixes for x87 instructions.
13860 * -- print a star (in certain assembler syntax)
13861 A -- print an absolute memory reference.
13862 E -- print address with DImode register names if TARGET_64BIT.
13863 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13864 s -- print a shift double count, followed by the assemblers argument
13866 b -- print the QImode name of the register for the indicated operand.
13867 %b0 would print %al if operands[0] is reg 0.
13868 w -- likewise, print the HImode name of the register.
13869 k -- likewise, print the SImode name of the register.
13870 q -- likewise, print the DImode name of the register.
13871 x -- likewise, print the V4SFmode name of the register.
13872 t -- likewise, print the V8SFmode name of the register.
13873 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13874 y -- print "st(0)" instead of "st" as a register.
13875 d -- print duplicated register operand for AVX instruction.
13876 D -- print condition for SSE cmp instruction.
13877 P -- if PIC, print an @PLT suffix.
13878 p -- print raw symbol name.
13879 X -- don't print any sort of PIC '@' suffix for a symbol.
13880 & -- print some in-use local-dynamic symbol name.
13881 H -- print a memory address offset by 8; used for sse high-parts
13882 Y -- print condition for XOP pcom* instruction.
13883 + -- print a branch hint as 'cs' or 'ds' prefix
13884 ; -- print a semicolon (after prefixes due to bug in older gas).
13885 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13886 @ -- print a segment register of thread base pointer load
13890 ix86_print_operand (FILE *file, rtx x, int code)
13897 if (ASSEMBLER_DIALECT == ASM_ATT)
13903 const char *name = get_some_local_dynamic_name ();
13905 output_operand_lossage ("'%%&' used without any "
13906 "local dynamic TLS references");
13908 assemble_name (file, name);
13913 switch (ASSEMBLER_DIALECT)
13920 /* Intel syntax. For absolute addresses, registers should not
13921 be surrounded by braces. */
13925 ix86_print_operand (file, x, 0);
13932 gcc_unreachable ();
13935 ix86_print_operand (file, x, 0);
13939 /* Wrap address in an UNSPEC to declare special handling. */
13941 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
13943 output_address (x);
13947 if (ASSEMBLER_DIALECT == ASM_ATT)
13952 if (ASSEMBLER_DIALECT == ASM_ATT)
13957 if (ASSEMBLER_DIALECT == ASM_ATT)
13962 if (ASSEMBLER_DIALECT == ASM_ATT)
13967 if (ASSEMBLER_DIALECT == ASM_ATT)
13972 if (ASSEMBLER_DIALECT == ASM_ATT)
13977 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13979 /* Opcodes don't get size suffixes if using Intel opcodes. */
13980 if (ASSEMBLER_DIALECT == ASM_INTEL)
13983 switch (GET_MODE_SIZE (GET_MODE (x)))
14002 output_operand_lossage
14003 ("invalid operand size for operand code '%c'", code);
14008 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14010 (0, "non-integer operand used with operand code '%c'", code);
14014 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14015 if (ASSEMBLER_DIALECT == ASM_INTEL)
14018 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14020 switch (GET_MODE_SIZE (GET_MODE (x)))
14023 #ifdef HAVE_AS_IX86_FILDS
14033 #ifdef HAVE_AS_IX86_FILDQ
14036 fputs ("ll", file);
14044 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14046 /* 387 opcodes don't get size suffixes
14047 if the operands are registers. */
14048 if (STACK_REG_P (x))
14051 switch (GET_MODE_SIZE (GET_MODE (x)))
14072 output_operand_lossage
14073 ("invalid operand type used with operand code '%c'", code);
14077 output_operand_lossage
14078 ("invalid operand size for operand code '%c'", code);
14096 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14098 ix86_print_operand (file, x, 0);
14099 fputs (", ", file);
14104 /* Little bit of braindamage here. The SSE compare instructions
14105 does use completely different names for the comparisons that the
14106 fp conditional moves. */
14109 switch (GET_CODE (x))
14112 fputs ("eq", file);
14115 fputs ("eq_us", file);
14118 fputs ("lt", file);
14121 fputs ("nge", file);
14124 fputs ("le", file);
14127 fputs ("ngt", file);
14130 fputs ("unord", file);
14133 fputs ("neq", file);
14136 fputs ("neq_oq", file);
14139 fputs ("ge", file);
14142 fputs ("nlt", file);
14145 fputs ("gt", file);
14148 fputs ("nle", file);
14151 fputs ("ord", file);
14154 output_operand_lossage ("operand is not a condition code, "
14155 "invalid operand code 'D'");
14161 switch (GET_CODE (x))
14165 fputs ("eq", file);
14169 fputs ("lt", file);
14173 fputs ("le", file);
14176 fputs ("unord", file);
14180 fputs ("neq", file);
14184 fputs ("nlt", file);
14188 fputs ("nle", file);
14191 fputs ("ord", file);
14194 output_operand_lossage ("operand is not a condition code, "
14195 "invalid operand code 'D'");
14201 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14202 if (ASSEMBLER_DIALECT == ASM_ATT)
14204 switch (GET_MODE (x))
14206 case HImode: putc ('w', file); break;
14208 case SFmode: putc ('l', file); break;
14210 case DFmode: putc ('q', file); break;
14211 default: gcc_unreachable ();
14218 if (!COMPARISON_P (x))
14220 output_operand_lossage ("operand is neither a constant nor a "
14221 "condition code, invalid operand code "
14225 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
14228 if (!COMPARISON_P (x))
14230 output_operand_lossage ("operand is neither a constant nor a "
14231 "condition code, invalid operand code "
14235 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14236 if (ASSEMBLER_DIALECT == ASM_ATT)
14239 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14242 /* Like above, but reverse condition */
14244 /* Check to see if argument to %c is really a constant
14245 and not a condition code which needs to be reversed. */
14246 if (!COMPARISON_P (x))
14248 output_operand_lossage ("operand is neither a constant nor a "
14249 "condition code, invalid operand "
14253 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14256 if (!COMPARISON_P (x))
14258 output_operand_lossage ("operand is neither a constant nor a "
14259 "condition code, invalid operand "
14263 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14264 if (ASSEMBLER_DIALECT == ASM_ATT)
14267 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14271 if (!offsettable_memref_p (x))
14273 output_operand_lossage ("operand is not an offsettable memory "
14274 "reference, invalid operand "
14278 /* It doesn't actually matter what mode we use here, as we're
14279 only going to use this for printing. */
14280 x = adjust_address_nv (x, DImode, 8);
14288 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14291 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14294 int pred_val = INTVAL (XEXP (x, 0));
14296 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14297 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14299 int taken = pred_val > REG_BR_PROB_BASE / 2;
14300 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14302 /* Emit hints only in the case default branch prediction
14303 heuristics would fail. */
14304 if (taken != cputaken)
14306 /* We use 3e (DS) prefix for taken branches and
14307 2e (CS) prefix for not taken branches. */
14309 fputs ("ds ; ", file);
14311 fputs ("cs ; ", file);
14319 switch (GET_CODE (x))
14322 fputs ("neq", file);
14325 fputs ("eq", file);
14329 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14333 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14337 fputs ("le", file);
14341 fputs ("lt", file);
14344 fputs ("unord", file);
14347 fputs ("ord", file);
14350 fputs ("ueq", file);
14353 fputs ("nlt", file);
14356 fputs ("nle", file);
14359 fputs ("ule", file);
14362 fputs ("ult", file);
14365 fputs ("une", file);
14368 output_operand_lossage ("operand is not a condition code, "
14369 "invalid operand code 'Y'");
14375 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14381 if (ASSEMBLER_DIALECT == ASM_ATT)
14384 /* The kernel uses a different segment register for performance
14385 reasons; a system call would not have to trash the userspace
14386 segment register, which would be expensive. */
14387 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14388 fputs ("fs", file);
14390 fputs ("gs", file);
14394 putc (TARGET_AVX2 ? 'i' : 'f', file);
14398 output_operand_lossage ("invalid operand code '%c'", code);
14403 print_reg (x, code, file);
14405 else if (MEM_P (x))
14407 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14408 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14409 && GET_MODE (x) != BLKmode)
14412 switch (GET_MODE_SIZE (GET_MODE (x)))
14414 case 1: size = "BYTE"; break;
14415 case 2: size = "WORD"; break;
14416 case 4: size = "DWORD"; break;
14417 case 8: size = "QWORD"; break;
14418 case 12: size = "TBYTE"; break;
14420 if (GET_MODE (x) == XFmode)
14425 case 32: size = "YMMWORD"; break;
14427 gcc_unreachable ();
14430 /* Check for explicit size override (codes 'b', 'w', 'k',
14434 else if (code == 'w')
14436 else if (code == 'k')
14438 else if (code == 'q')
14440 else if (code == 'x')
14443 fputs (size, file);
14444 fputs (" PTR ", file);
14448 /* Avoid (%rip) for call operands. */
14449 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14450 && !CONST_INT_P (x))
14451 output_addr_const (file, x);
14452 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14453 output_operand_lossage ("invalid constraints for operand");
14455 output_address (x);
14458 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14463 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14464 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14466 if (ASSEMBLER_DIALECT == ASM_ATT)
14468 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14470 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14472 fprintf (file, "0x%08x", (unsigned int) l);
14475 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14480 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14481 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14483 if (ASSEMBLER_DIALECT == ASM_ATT)
14485 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14488 /* These float cases don't actually occur as immediate operands. */
14489 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14493 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14494 fputs (dstr, file);
14499 /* We have patterns that allow zero sets of memory, for instance.
14500 In 64-bit mode, we should probably support all 8-byte vectors,
14501 since we can in fact encode that into an immediate. */
14502 if (GET_CODE (x) == CONST_VECTOR)
14504 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14508 if (code != 'P' && code != 'p')
14510 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14512 if (ASSEMBLER_DIALECT == ASM_ATT)
14515 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14516 || GET_CODE (x) == LABEL_REF)
14518 if (ASSEMBLER_DIALECT == ASM_ATT)
14521 fputs ("OFFSET FLAT:", file);
14524 if (CONST_INT_P (x))
14525 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14526 else if (flag_pic || MACHOPIC_INDIRECT)
14527 output_pic_addr_const (file, x, code);
14529 output_addr_const (file, x);
14534 ix86_print_operand_punct_valid_p (unsigned char code)
14536 return (code == '@' || code == '*' || code == '+'
14537 || code == '&' || code == ';' || code == '~');
14540 /* Print a memory operand whose address is ADDR. */
14543 ix86_print_operand_address (FILE *file, rtx addr)
14545 struct ix86_address parts;
14546 rtx base, index, disp;
14552 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14554 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14555 gcc_assert (parts.index == NULL_RTX);
14556 parts.index = XVECEXP (addr, 0, 1);
14557 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14558 addr = XVECEXP (addr, 0, 0);
14561 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
14563 gcc_assert (TARGET_64BIT);
14564 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14568 ok = ix86_decompose_address (addr, &parts);
14573 index = parts.index;
14575 scale = parts.scale;
14583 if (ASSEMBLER_DIALECT == ASM_ATT)
14585 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14588 gcc_unreachable ();
14591 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14592 if (TARGET_64BIT && !base && !index)
14596 if (GET_CODE (disp) == CONST
14597 && GET_CODE (XEXP (disp, 0)) == PLUS
14598 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14599 symbol = XEXP (XEXP (disp, 0), 0);
14601 if (GET_CODE (symbol) == LABEL_REF
14602 || (GET_CODE (symbol) == SYMBOL_REF
14603 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14606 if (!base && !index)
14608 /* Displacement only requires special attention. */
14610 if (CONST_INT_P (disp))
14612 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14613 fputs ("ds:", file);
14614 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14617 output_pic_addr_const (file, disp, 0);
14619 output_addr_const (file, disp);
14623 /* Print SImode register names to force addr32 prefix. */
14624 if (SImode_address_operand (addr, VOIDmode))
14626 #ifdef ENABLE_CHECKING
14627 gcc_assert (TARGET_64BIT);
14628 switch (GET_CODE (addr))
14631 gcc_assert (GET_MODE (addr) == SImode);
14632 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
14636 gcc_assert (GET_MODE (addr) == DImode);
14639 gcc_unreachable ();
14642 gcc_assert (!code);
14646 if (ASSEMBLER_DIALECT == ASM_ATT)
14651 output_pic_addr_const (file, disp, 0);
14652 else if (GET_CODE (disp) == LABEL_REF)
14653 output_asm_label (disp);
14655 output_addr_const (file, disp);
14660 print_reg (base, code, file);
14664 print_reg (index, vsib ? 0 : code, file);
14665 if (scale != 1 || vsib)
14666 fprintf (file, ",%d", scale);
14672 rtx offset = NULL_RTX;
14676 /* Pull out the offset of a symbol; print any symbol itself. */
14677 if (GET_CODE (disp) == CONST
14678 && GET_CODE (XEXP (disp, 0)) == PLUS
14679 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14681 offset = XEXP (XEXP (disp, 0), 1);
14682 disp = gen_rtx_CONST (VOIDmode,
14683 XEXP (XEXP (disp, 0), 0));
14687 output_pic_addr_const (file, disp, 0);
14688 else if (GET_CODE (disp) == LABEL_REF)
14689 output_asm_label (disp);
14690 else if (CONST_INT_P (disp))
14693 output_addr_const (file, disp);
14699 print_reg (base, code, file);
14702 if (INTVAL (offset) >= 0)
14704 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14708 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14715 print_reg (index, vsib ? 0 : code, file);
14716 if (scale != 1 || vsib)
14717 fprintf (file, "*%d", scale);
14724 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14727 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14731 if (GET_CODE (x) != UNSPEC)
14734 op = XVECEXP (x, 0, 0);
14735 switch (XINT (x, 1))
14737 case UNSPEC_GOTTPOFF:
14738 output_addr_const (file, op);
14739 /* FIXME: This might be @TPOFF in Sun ld. */
14740 fputs ("@gottpoff", file);
14743 output_addr_const (file, op);
14744 fputs ("@tpoff", file);
14746 case UNSPEC_NTPOFF:
14747 output_addr_const (file, op);
14749 fputs ("@tpoff", file);
14751 fputs ("@ntpoff", file);
14753 case UNSPEC_DTPOFF:
14754 output_addr_const (file, op);
14755 fputs ("@dtpoff", file);
14757 case UNSPEC_GOTNTPOFF:
14758 output_addr_const (file, op);
14760 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14761 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14763 fputs ("@gotntpoff", file);
14765 case UNSPEC_INDNTPOFF:
14766 output_addr_const (file, op);
14767 fputs ("@indntpoff", file);
14770 case UNSPEC_MACHOPIC_OFFSET:
14771 output_addr_const (file, op);
14773 machopic_output_function_base_name (file);
14777 case UNSPEC_STACK_CHECK:
14781 gcc_assert (flag_split_stack);
14783 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14784 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14786 gcc_unreachable ();
14789 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14800 /* Split one or more double-mode RTL references into pairs of half-mode
14801 references. The RTL can be REG, offsettable MEM, integer constant, or
14802 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14803 split and "num" is its length. lo_half and hi_half are output arrays
14804 that parallel "operands". */
14807 split_double_mode (enum machine_mode mode, rtx operands[],
14808 int num, rtx lo_half[], rtx hi_half[])
14810 enum machine_mode half_mode;
14816 half_mode = DImode;
14819 half_mode = SImode;
14822 gcc_unreachable ();
14825 byte = GET_MODE_SIZE (half_mode);
14829 rtx op = operands[num];
14831 /* simplify_subreg refuse to split volatile memory addresses,
14832 but we still have to handle it. */
14835 lo_half[num] = adjust_address (op, half_mode, 0);
14836 hi_half[num] = adjust_address (op, half_mode, byte);
14840 lo_half[num] = simplify_gen_subreg (half_mode, op,
14841 GET_MODE (op) == VOIDmode
14842 ? mode : GET_MODE (op), 0);
14843 hi_half[num] = simplify_gen_subreg (half_mode, op,
14844 GET_MODE (op) == VOIDmode
14845 ? mode : GET_MODE (op), byte);
14850 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14851 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14852 is the expression of the binary operation. The output may either be
14853 emitted here, or returned to the caller, like all output_* functions.
14855 There is no guarantee that the operands are the same mode, as they
14856 might be within FLOAT or FLOAT_EXTEND expressions. */
14858 #ifndef SYSV386_COMPAT
14859 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14860 wants to fix the assemblers because that causes incompatibility
14861 with gcc. No-one wants to fix gcc because that causes
14862 incompatibility with assemblers... You can use the option of
14863 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14864 #define SYSV386_COMPAT 1
14868 output_387_binary_op (rtx insn, rtx *operands)
14870 static char buf[40];
14873 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14875 #ifdef ENABLE_CHECKING
14876 /* Even if we do not want to check the inputs, this documents input
14877 constraints. Which helps in understanding the following code. */
14878 if (STACK_REG_P (operands[0])
14879 && ((REG_P (operands[1])
14880 && REGNO (operands[0]) == REGNO (operands[1])
14881 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14882 || (REG_P (operands[2])
14883 && REGNO (operands[0]) == REGNO (operands[2])
14884 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14885 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14888 gcc_assert (is_sse);
14891 switch (GET_CODE (operands[3]))
14894 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14895 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14903 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14904 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14912 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14913 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14921 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14922 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14930 gcc_unreachable ();
14937 strcpy (buf, ssep);
14938 if (GET_MODE (operands[0]) == SFmode)
14939 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14941 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14945 strcpy (buf, ssep + 1);
14946 if (GET_MODE (operands[0]) == SFmode)
14947 strcat (buf, "ss\t{%2, %0|%0, %2}");
14949 strcat (buf, "sd\t{%2, %0|%0, %2}");
14955 switch (GET_CODE (operands[3]))
14959 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14961 rtx temp = operands[2];
14962 operands[2] = operands[1];
14963 operands[1] = temp;
14966 /* know operands[0] == operands[1]. */
14968 if (MEM_P (operands[2]))
14974 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14976 if (STACK_TOP_P (operands[0]))
14977 /* How is it that we are storing to a dead operand[2]?
14978 Well, presumably operands[1] is dead too. We can't
14979 store the result to st(0) as st(0) gets popped on this
14980 instruction. Instead store to operands[2] (which I
14981 think has to be st(1)). st(1) will be popped later.
14982 gcc <= 2.8.1 didn't have this check and generated
14983 assembly code that the Unixware assembler rejected. */
14984 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14986 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14990 if (STACK_TOP_P (operands[0]))
14991 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14993 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14998 if (MEM_P (operands[1]))
15004 if (MEM_P (operands[2]))
15010 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15013 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15014 derived assemblers, confusingly reverse the direction of
15015 the operation for fsub{r} and fdiv{r} when the
15016 destination register is not st(0). The Intel assembler
15017 doesn't have this brain damage. Read !SYSV386_COMPAT to
15018 figure out what the hardware really does. */
15019 if (STACK_TOP_P (operands[0]))
15020 p = "{p\t%0, %2|rp\t%2, %0}";
15022 p = "{rp\t%2, %0|p\t%0, %2}";
15024 if (STACK_TOP_P (operands[0]))
15025 /* As above for fmul/fadd, we can't store to st(0). */
15026 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15028 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15033 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15036 if (STACK_TOP_P (operands[0]))
15037 p = "{rp\t%0, %1|p\t%1, %0}";
15039 p = "{p\t%1, %0|rp\t%0, %1}";
15041 if (STACK_TOP_P (operands[0]))
15042 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15044 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15049 if (STACK_TOP_P (operands[0]))
15051 if (STACK_TOP_P (operands[1]))
15052 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15054 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15057 else if (STACK_TOP_P (operands[1]))
15060 p = "{\t%1, %0|r\t%0, %1}";
15062 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15068 p = "{r\t%2, %0|\t%0, %2}";
15070 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15076 gcc_unreachable ();
15083 /* Return needed mode for entity in optimize_mode_switching pass. */
15086 ix86_mode_needed (int entity, rtx insn)
15088 enum attr_i387_cw mode;
15090 /* The mode UNINITIALIZED is used to store control word after a
15091 function call or ASM pattern. The mode ANY specify that function
15092 has no requirements on the control word and make no changes in the
15093 bits we are interested in. */
15096 || (NONJUMP_INSN_P (insn)
15097 && (asm_noperands (PATTERN (insn)) >= 0
15098 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15099 return I387_CW_UNINITIALIZED;
15101 if (recog_memoized (insn) < 0)
15102 return I387_CW_ANY;
15104 mode = get_attr_i387_cw (insn);
15109 if (mode == I387_CW_TRUNC)
15114 if (mode == I387_CW_FLOOR)
15119 if (mode == I387_CW_CEIL)
15124 if (mode == I387_CW_MASK_PM)
15129 gcc_unreachable ();
15132 return I387_CW_ANY;
15135 /* Output code to initialize control word copies used by trunc?f?i and
15136 rounding patterns. CURRENT_MODE is set to current control word,
15137 while NEW_MODE is set to new control word. */
15140 emit_i387_cw_initialization (int mode)
15142 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15145 enum ix86_stack_slot slot;
15147 rtx reg = gen_reg_rtx (HImode);
15149 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15150 emit_move_insn (reg, copy_rtx (stored_mode));
15152 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15153 || optimize_function_for_size_p (cfun))
15157 case I387_CW_TRUNC:
15158 /* round toward zero (truncate) */
15159 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15160 slot = SLOT_CW_TRUNC;
15163 case I387_CW_FLOOR:
15164 /* round down toward -oo */
15165 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15166 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15167 slot = SLOT_CW_FLOOR;
15171 /* round up toward +oo */
15172 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15173 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15174 slot = SLOT_CW_CEIL;
15177 case I387_CW_MASK_PM:
15178 /* mask precision exception for nearbyint() */
15179 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15180 slot = SLOT_CW_MASK_PM;
15184 gcc_unreachable ();
15191 case I387_CW_TRUNC:
15192 /* round toward zero (truncate) */
15193 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15194 slot = SLOT_CW_TRUNC;
15197 case I387_CW_FLOOR:
15198 /* round down toward -oo */
15199 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15200 slot = SLOT_CW_FLOOR;
15204 /* round up toward +oo */
15205 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15206 slot = SLOT_CW_CEIL;
15209 case I387_CW_MASK_PM:
15210 /* mask precision exception for nearbyint() */
15211 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15212 slot = SLOT_CW_MASK_PM;
15216 gcc_unreachable ();
15220 gcc_assert (slot < MAX_386_STACK_LOCALS);
15222 new_mode = assign_386_stack_local (HImode, slot);
15223 emit_move_insn (new_mode, reg);
15226 /* Output code for INSN to convert a float to a signed int. OPERANDS
15227 are the insn operands. The output may be [HSD]Imode and the input
15228 operand may be [SDX]Fmode. */
15231 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
15233 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15234 int dimode_p = GET_MODE (operands[0]) == DImode;
15235 int round_mode = get_attr_i387_cw (insn);
15237 /* Jump through a hoop or two for DImode, since the hardware has no
15238 non-popping instruction. We used to do this a different way, but
15239 that was somewhat fragile and broke with post-reload splitters. */
15240 if ((dimode_p || fisttp) && !stack_top_dies)
15241 output_asm_insn ("fld\t%y1", operands);
15243 gcc_assert (STACK_TOP_P (operands[1]));
15244 gcc_assert (MEM_P (operands[0]));
15245 gcc_assert (GET_MODE (operands[1]) != TFmode);
15248 output_asm_insn ("fisttp%Z0\t%0", operands);
15251 if (round_mode != I387_CW_ANY)
15252 output_asm_insn ("fldcw\t%3", operands);
15253 if (stack_top_dies || dimode_p)
15254 output_asm_insn ("fistp%Z0\t%0", operands);
15256 output_asm_insn ("fist%Z0\t%0", operands);
15257 if (round_mode != I387_CW_ANY)
15258 output_asm_insn ("fldcw\t%2", operands);
15264 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15265 have the values zero or one, indicates the ffreep insn's operand
15266 from the OPERANDS array. */
15268 static const char *
15269 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15271 if (TARGET_USE_FFREEP)
15272 #ifdef HAVE_AS_IX86_FFREEP
15273 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15276 static char retval[32];
15277 int regno = REGNO (operands[opno]);
15279 gcc_assert (FP_REGNO_P (regno));
15281 regno -= FIRST_STACK_REG;
15283 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15288 return opno ? "fstp\t%y1" : "fstp\t%y0";
15292 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15293 should be used. UNORDERED_P is true when fucom should be used. */
15296 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
15298 int stack_top_dies;
15299 rtx cmp_op0, cmp_op1;
15300 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15304 cmp_op0 = operands[0];
15305 cmp_op1 = operands[1];
15309 cmp_op0 = operands[1];
15310 cmp_op1 = operands[2];
15315 if (GET_MODE (operands[0]) == SFmode)
15317 return "%vucomiss\t{%1, %0|%0, %1}";
15319 return "%vcomiss\t{%1, %0|%0, %1}";
15322 return "%vucomisd\t{%1, %0|%0, %1}";
15324 return "%vcomisd\t{%1, %0|%0, %1}";
15327 gcc_assert (STACK_TOP_P (cmp_op0));
15329 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15331 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15333 if (stack_top_dies)
15335 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15336 return output_387_ffreep (operands, 1);
15339 return "ftst\n\tfnstsw\t%0";
15342 if (STACK_REG_P (cmp_op1)
15344 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15345 && REGNO (cmp_op1) != FIRST_STACK_REG)
15347 /* If both the top of the 387 stack dies, and the other operand
15348 is also a stack register that dies, then this must be a
15349 `fcompp' float compare */
15353 /* There is no double popping fcomi variant. Fortunately,
15354 eflags is immune from the fstp's cc clobbering. */
15356 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15358 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15359 return output_387_ffreep (operands, 0);
15364 return "fucompp\n\tfnstsw\t%0";
15366 return "fcompp\n\tfnstsw\t%0";
15371 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15373 static const char * const alt[16] =
15375 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15376 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15377 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15378 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15380 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15381 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15385 "fcomi\t{%y1, %0|%0, %y1}",
15386 "fcomip\t{%y1, %0|%0, %y1}",
15387 "fucomi\t{%y1, %0|%0, %y1}",
15388 "fucomip\t{%y1, %0|%0, %y1}",
15399 mask = eflags_p << 3;
15400 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15401 mask |= unordered_p << 1;
15402 mask |= stack_top_dies;
15404 gcc_assert (mask < 16);
15413 ix86_output_addr_vec_elt (FILE *file, int value)
15415 const char *directive = ASM_LONG;
15419 directive = ASM_QUAD;
15421 gcc_assert (!TARGET_64BIT);
15424 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15428 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15430 const char *directive = ASM_LONG;
15433 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15434 directive = ASM_QUAD;
15436 gcc_assert (!TARGET_64BIT);
15438 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15439 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15440 fprintf (file, "%s%s%d-%s%d\n",
15441 directive, LPREFIX, value, LPREFIX, rel);
15442 else if (HAVE_AS_GOTOFF_IN_DATA)
15443 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15445 else if (TARGET_MACHO)
15447 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15448 machopic_output_function_base_name (file);
15453 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15454 GOT_SYMBOL_NAME, LPREFIX, value);
15457 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15461 ix86_expand_clear (rtx dest)
15465 /* We play register width games, which are only valid after reload. */
15466 gcc_assert (reload_completed);
15468 /* Avoid HImode and its attendant prefix byte. */
15469 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15470 dest = gen_rtx_REG (SImode, REGNO (dest));
15471 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15473 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15474 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15476 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15477 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15483 /* X is an unchanging MEM. If it is a constant pool reference, return
15484 the constant pool rtx, else NULL. */
15487 maybe_get_pool_constant (rtx x)
15489 x = ix86_delegitimize_address (XEXP (x, 0));
15491 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15492 return get_pool_constant (x);
15498 ix86_expand_move (enum machine_mode mode, rtx operands[])
15501 enum tls_model model;
15506 if (GET_CODE (op1) == SYMBOL_REF)
15508 model = SYMBOL_REF_TLS_MODEL (op1);
15511 op1 = legitimize_tls_address (op1, model, true);
15512 op1 = force_operand (op1, op0);
15515 if (GET_MODE (op1) != mode)
15516 op1 = convert_to_mode (mode, op1, 1);
15518 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15519 && SYMBOL_REF_DLLIMPORT_P (op1))
15520 op1 = legitimize_dllimport_symbol (op1, false);
15522 else if (GET_CODE (op1) == CONST
15523 && GET_CODE (XEXP (op1, 0)) == PLUS
15524 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15526 rtx addend = XEXP (XEXP (op1, 0), 1);
15527 rtx symbol = XEXP (XEXP (op1, 0), 0);
15530 model = SYMBOL_REF_TLS_MODEL (symbol);
15532 tmp = legitimize_tls_address (symbol, model, true);
15533 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15534 && SYMBOL_REF_DLLIMPORT_P (symbol))
15535 tmp = legitimize_dllimport_symbol (symbol, true);
15539 tmp = force_operand (tmp, NULL);
15540 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15541 op0, 1, OPTAB_DIRECT);
15544 if (GET_MODE (tmp) != mode)
15545 op1 = convert_to_mode (mode, tmp, 1);
15549 if ((flag_pic || MACHOPIC_INDIRECT)
15550 && symbolic_operand (op1, mode))
15552 if (TARGET_MACHO && !TARGET_64BIT)
15555 /* dynamic-no-pic */
15556 if (MACHOPIC_INDIRECT)
15558 rtx temp = ((reload_in_progress
15559 || ((op0 && REG_P (op0))
15561 ? op0 : gen_reg_rtx (Pmode));
15562 op1 = machopic_indirect_data_reference (op1, temp);
15564 op1 = machopic_legitimize_pic_address (op1, mode,
15565 temp == op1 ? 0 : temp);
15567 if (op0 != op1 && GET_CODE (op0) != MEM)
15569 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15573 if (GET_CODE (op0) == MEM)
15574 op1 = force_reg (Pmode, op1);
15578 if (GET_CODE (temp) != REG)
15579 temp = gen_reg_rtx (Pmode);
15580 temp = legitimize_pic_address (op1, temp);
15585 /* dynamic-no-pic */
15591 op1 = force_reg (mode, op1);
15592 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15594 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15595 op1 = legitimize_pic_address (op1, reg);
15598 if (GET_MODE (op1) != mode)
15599 op1 = convert_to_mode (mode, op1, 1);
15606 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15607 || !push_operand (op0, mode))
15609 op1 = force_reg (mode, op1);
15611 if (push_operand (op0, mode)
15612 && ! general_no_elim_operand (op1, mode))
15613 op1 = copy_to_mode_reg (mode, op1);
15615 /* Force large constants in 64bit compilation into register
15616 to get them CSEed. */
15617 if (can_create_pseudo_p ()
15618 && (mode == DImode) && TARGET_64BIT
15619 && immediate_operand (op1, mode)
15620 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15621 && !register_operand (op0, mode)
15623 op1 = copy_to_mode_reg (mode, op1);
15625 if (can_create_pseudo_p ()
15626 && FLOAT_MODE_P (mode)
15627 && GET_CODE (op1) == CONST_DOUBLE)
15629 /* If we are loading a floating point constant to a register,
15630 force the value to memory now, since we'll get better code
15631 out the back end. */
15633 op1 = validize_mem (force_const_mem (mode, op1));
15634 if (!register_operand (op0, mode))
15636 rtx temp = gen_reg_rtx (mode);
15637 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15638 emit_move_insn (op0, temp);
15644 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15648 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15650 rtx op0 = operands[0], op1 = operands[1];
15651 unsigned int align = GET_MODE_ALIGNMENT (mode);
15653 /* Force constants other than zero into memory. We do not know how
15654 the instructions used to build constants modify the upper 64 bits
15655 of the register, once we have that information we may be able
15656 to handle some of them more efficiently. */
15657 if (can_create_pseudo_p ()
15658 && register_operand (op0, mode)
15659 && (CONSTANT_P (op1)
15660 || (GET_CODE (op1) == SUBREG
15661 && CONSTANT_P (SUBREG_REG (op1))))
15662 && !standard_sse_constant_p (op1))
15663 op1 = validize_mem (force_const_mem (mode, op1));
15665 /* We need to check memory alignment for SSE mode since attribute
15666 can make operands unaligned. */
15667 if (can_create_pseudo_p ()
15668 && SSE_REG_MODE_P (mode)
15669 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15670 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15674 /* ix86_expand_vector_move_misalign() does not like constants ... */
15675 if (CONSTANT_P (op1)
15676 || (GET_CODE (op1) == SUBREG
15677 && CONSTANT_P (SUBREG_REG (op1))))
15678 op1 = validize_mem (force_const_mem (mode, op1));
15680 /* ... nor both arguments in memory. */
15681 if (!register_operand (op0, mode)
15682 && !register_operand (op1, mode))
15683 op1 = force_reg (mode, op1);
15685 tmp[0] = op0; tmp[1] = op1;
15686 ix86_expand_vector_move_misalign (mode, tmp);
15690 /* Make operand1 a register if it isn't already. */
15691 if (can_create_pseudo_p ()
15692 && !register_operand (op0, mode)
15693 && !register_operand (op1, mode))
15695 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15699 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15702 /* Split 32-byte AVX unaligned load and store if needed. */
15705 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15708 rtx (*extract) (rtx, rtx, rtx);
15709 rtx (*load_unaligned) (rtx, rtx);
15710 rtx (*store_unaligned) (rtx, rtx);
15711 enum machine_mode mode;
15713 switch (GET_MODE (op0))
15716 gcc_unreachable ();
15718 extract = gen_avx_vextractf128v32qi;
15719 load_unaligned = gen_avx_loaddqu256;
15720 store_unaligned = gen_avx_storedqu256;
15724 extract = gen_avx_vextractf128v8sf;
15725 load_unaligned = gen_avx_loadups256;
15726 store_unaligned = gen_avx_storeups256;
15730 extract = gen_avx_vextractf128v4df;
15731 load_unaligned = gen_avx_loadupd256;
15732 store_unaligned = gen_avx_storeupd256;
15739 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15741 rtx r = gen_reg_rtx (mode);
15742 m = adjust_address (op1, mode, 0);
15743 emit_move_insn (r, m);
15744 m = adjust_address (op1, mode, 16);
15745 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15746 emit_move_insn (op0, r);
15749 emit_insn (load_unaligned (op0, op1));
15751 else if (MEM_P (op0))
15753 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15755 m = adjust_address (op0, mode, 0);
15756 emit_insn (extract (m, op1, const0_rtx));
15757 m = adjust_address (op0, mode, 16);
15758 emit_insn (extract (m, op1, const1_rtx));
15761 emit_insn (store_unaligned (op0, op1));
15764 gcc_unreachable ();
15767 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15768 straight to ix86_expand_vector_move. */
15769 /* Code generation for scalar reg-reg moves of single and double precision data:
15770 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15774 if (x86_sse_partial_reg_dependency == true)
15779 Code generation for scalar loads of double precision data:
15780 if (x86_sse_split_regs == true)
15781 movlpd mem, reg (gas syntax)
15785 Code generation for unaligned packed loads of single precision data
15786 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15787 if (x86_sse_unaligned_move_optimal)
15790 if (x86_sse_partial_reg_dependency == true)
15802 Code generation for unaligned packed loads of double precision data
15803 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15804 if (x86_sse_unaligned_move_optimal)
15807 if (x86_sse_split_regs == true)
15820 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15823 rtx (*move_unaligned) (rtx, rtx);
15830 switch (GET_MODE_CLASS (mode))
15832 case MODE_VECTOR_INT:
15834 switch (GET_MODE_SIZE (mode))
15837 /* If we're optimizing for size, movups is the smallest. */
15838 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15841 move_unaligned = gen_sse_loadups;
15842 else if (MEM_P (op0))
15843 move_unaligned = gen_sse_storeups;
15845 gcc_unreachable ();
15847 op0 = gen_lowpart (V4SFmode, op0);
15848 op1 = gen_lowpart (V4SFmode, op1);
15849 emit_insn (move_unaligned (op0, op1));
15853 move_unaligned = gen_sse2_loaddqu;
15854 else if (MEM_P (op0))
15855 move_unaligned = gen_sse2_storedqu;
15857 gcc_unreachable ();
15859 op0 = gen_lowpart (V16QImode, op0);
15860 op1 = gen_lowpart (V16QImode, op1);
15861 emit_insn (move_unaligned (op0, op1));
15864 op0 = gen_lowpart (V32QImode, op0);
15865 op1 = gen_lowpart (V32QImode, op1);
15866 ix86_avx256_split_vector_move_misalign (op0, op1);
15869 gcc_unreachable ();
15872 case MODE_VECTOR_FLOAT:
15873 op0 = gen_lowpart (mode, op0);
15874 op1 = gen_lowpart (mode, op1);
15880 move_unaligned = gen_sse_loadups;
15881 else if (MEM_P (op0))
15882 move_unaligned = gen_sse_storeups;
15884 gcc_unreachable ();
15886 emit_insn (move_unaligned (op0, op1));
15889 ix86_avx256_split_vector_move_misalign (op0, op1);
15892 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15895 move_unaligned = gen_sse_loadups;
15896 else if (MEM_P (op0))
15897 move_unaligned = gen_sse_storeups;
15899 gcc_unreachable ();
15901 op0 = gen_lowpart (V4SFmode, op0);
15902 op1 = gen_lowpart (V4SFmode, op1);
15903 emit_insn (move_unaligned (op0, op1));
15907 move_unaligned = gen_sse2_loadupd;
15908 else if (MEM_P (op0))
15909 move_unaligned = gen_sse2_storeupd;
15911 gcc_unreachable ();
15913 emit_insn (move_unaligned (op0, op1));
15916 ix86_avx256_split_vector_move_misalign (op0, op1);
15919 gcc_unreachable ();
15924 gcc_unreachable ();
15932 /* If we're optimizing for size, movups is the smallest. */
15933 if (optimize_insn_for_size_p ()
15934 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15936 op0 = gen_lowpart (V4SFmode, op0);
15937 op1 = gen_lowpart (V4SFmode, op1);
15938 emit_insn (gen_sse_loadups (op0, op1));
15942 /* ??? If we have typed data, then it would appear that using
15943 movdqu is the only way to get unaligned data loaded with
15945 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15947 op0 = gen_lowpart (V16QImode, op0);
15948 op1 = gen_lowpart (V16QImode, op1);
15949 emit_insn (gen_sse2_loaddqu (op0, op1));
15953 if (TARGET_SSE2 && mode == V2DFmode)
15957 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15959 op0 = gen_lowpart (V2DFmode, op0);
15960 op1 = gen_lowpart (V2DFmode, op1);
15961 emit_insn (gen_sse2_loadupd (op0, op1));
15965 /* When SSE registers are split into halves, we can avoid
15966 writing to the top half twice. */
15967 if (TARGET_SSE_SPLIT_REGS)
15969 emit_clobber (op0);
15974 /* ??? Not sure about the best option for the Intel chips.
15975 The following would seem to satisfy; the register is
15976 entirely cleared, breaking the dependency chain. We
15977 then store to the upper half, with a dependency depth
15978 of one. A rumor has it that Intel recommends two movsd
15979 followed by an unpacklpd, but this is unconfirmed. And
15980 given that the dependency depth of the unpacklpd would
15981 still be one, I'm not sure why this would be better. */
15982 zero = CONST0_RTX (V2DFmode);
15985 m = adjust_address (op1, DFmode, 0);
15986 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15987 m = adjust_address (op1, DFmode, 8);
15988 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15992 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15994 op0 = gen_lowpart (V4SFmode, op0);
15995 op1 = gen_lowpart (V4SFmode, op1);
15996 emit_insn (gen_sse_loadups (op0, op1));
16000 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
16001 emit_move_insn (op0, CONST0_RTX (mode));
16003 emit_clobber (op0);
16005 if (mode != V4SFmode)
16006 op0 = gen_lowpart (V4SFmode, op0);
16007 m = adjust_address (op1, V2SFmode, 0);
16008 emit_insn (gen_sse_loadlps (op0, op0, m));
16009 m = adjust_address (op1, V2SFmode, 8);
16010 emit_insn (gen_sse_loadhps (op0, op0, m));
16013 else if (MEM_P (op0))
16015 /* If we're optimizing for size, movups is the smallest. */
16016 if (optimize_insn_for_size_p ()
16017 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
16019 op0 = gen_lowpart (V4SFmode, op0);
16020 op1 = gen_lowpart (V4SFmode, op1);
16021 emit_insn (gen_sse_storeups (op0, op1));
16025 /* ??? Similar to above, only less clear because of quote
16026 typeless stores unquote. */
16027 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
16028 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16030 op0 = gen_lowpart (V16QImode, op0);
16031 op1 = gen_lowpart (V16QImode, op1);
16032 emit_insn (gen_sse2_storedqu (op0, op1));
16036 if (TARGET_SSE2 && mode == V2DFmode)
16038 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
16040 op0 = gen_lowpart (V2DFmode, op0);
16041 op1 = gen_lowpart (V2DFmode, op1);
16042 emit_insn (gen_sse2_storeupd (op0, op1));
16046 m = adjust_address (op0, DFmode, 0);
16047 emit_insn (gen_sse2_storelpd (m, op1));
16048 m = adjust_address (op0, DFmode, 8);
16049 emit_insn (gen_sse2_storehpd (m, op1));
16054 if (mode != V4SFmode)
16055 op1 = gen_lowpart (V4SFmode, op1);
16057 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
16059 op0 = gen_lowpart (V4SFmode, op0);
16060 emit_insn (gen_sse_storeups (op0, op1));
16064 m = adjust_address (op0, V2SFmode, 0);
16065 emit_insn (gen_sse_storelps (m, op1));
16066 m = adjust_address (op0, V2SFmode, 8);
16067 emit_insn (gen_sse_storehps (m, op1));
16072 gcc_unreachable ();
16075 /* Expand a push in MODE. This is some mode for which we do not support
16076 proper push instructions, at least from the registers that we expect
16077 the value to live in. */
16080 ix86_expand_push (enum machine_mode mode, rtx x)
16084 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
16085 GEN_INT (-GET_MODE_SIZE (mode)),
16086 stack_pointer_rtx, 1, OPTAB_DIRECT);
16087 if (tmp != stack_pointer_rtx)
16088 emit_move_insn (stack_pointer_rtx, tmp);
16090 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
16092 /* When we push an operand onto stack, it has to be aligned at least
16093 at the function argument boundary. However since we don't have
16094 the argument type, we can't determine the actual argument
16096 emit_move_insn (tmp, x);
16099 /* Helper function of ix86_fixup_binary_operands to canonicalize
16100 operand order. Returns true if the operands should be swapped. */
16103 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
16106 rtx dst = operands[0];
16107 rtx src1 = operands[1];
16108 rtx src2 = operands[2];
16110 /* If the operation is not commutative, we can't do anything. */
16111 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
16114 /* Highest priority is that src1 should match dst. */
16115 if (rtx_equal_p (dst, src1))
16117 if (rtx_equal_p (dst, src2))
16120 /* Next highest priority is that immediate constants come second. */
16121 if (immediate_operand (src2, mode))
16123 if (immediate_operand (src1, mode))
16126 /* Lowest priority is that memory references should come second. */
16136 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16137 destination to use for the operation. If different from the true
16138 destination in operands[0], a copy operation will be required. */
16141 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
16144 rtx dst = operands[0];
16145 rtx src1 = operands[1];
16146 rtx src2 = operands[2];
16148 /* Canonicalize operand order. */
16149 if (ix86_swap_binary_operands_p (code, mode, operands))
16153 /* It is invalid to swap operands of different modes. */
16154 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
16161 /* Both source operands cannot be in memory. */
16162 if (MEM_P (src1) && MEM_P (src2))
16164 /* Optimization: Only read from memory once. */
16165 if (rtx_equal_p (src1, src2))
16167 src2 = force_reg (mode, src2);
16171 src2 = force_reg (mode, src2);
16174 /* If the destination is memory, and we do not have matching source
16175 operands, do things in registers. */
16176 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16177 dst = gen_reg_rtx (mode);
16179 /* Source 1 cannot be a constant. */
16180 if (CONSTANT_P (src1))
16181 src1 = force_reg (mode, src1);
16183 /* Source 1 cannot be a non-matching memory. */
16184 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16185 src1 = force_reg (mode, src1);
16187 /* Improve address combine. */
16189 && GET_MODE_CLASS (mode) == MODE_INT
16191 src2 = force_reg (mode, src2);
16193 operands[1] = src1;
16194 operands[2] = src2;
16198 /* Similarly, but assume that the destination has already been
16199 set up properly. */
16202 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
16203 enum machine_mode mode, rtx operands[])
16205 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
16206 gcc_assert (dst == operands[0]);
16209 /* Attempt to expand a binary operator. Make the expansion closer to the
16210 actual machine, then just general_operand, which will allow 3 separate
16211 memory references (one output, two input) in a single insn. */
16214 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
16217 rtx src1, src2, dst, op, clob;
16219 dst = ix86_fixup_binary_operands (code, mode, operands);
16220 src1 = operands[1];
16221 src2 = operands[2];
16223 /* Emit the instruction. */
16225 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
16226 if (reload_in_progress)
16228 /* Reload doesn't know about the flags register, and doesn't know that
16229 it doesn't want to clobber it. We can only do this with PLUS. */
16230 gcc_assert (code == PLUS);
16233 else if (reload_completed
16235 && !rtx_equal_p (dst, src1))
16237 /* This is going to be an LEA; avoid splitting it later. */
16242 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16243 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16246 /* Fix up the destination if needed. */
16247 if (dst != operands[0])
16248 emit_move_insn (operands[0], dst);
16251 /* Return TRUE or FALSE depending on whether the binary operator meets the
16252 appropriate constraints. */
16255 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16258 rtx dst = operands[0];
16259 rtx src1 = operands[1];
16260 rtx src2 = operands[2];
16262 /* Both source operands cannot be in memory. */
16263 if (MEM_P (src1) && MEM_P (src2))
16266 /* Canonicalize operand order for commutative operators. */
16267 if (ix86_swap_binary_operands_p (code, mode, operands))
16274 /* If the destination is memory, we must have a matching source operand. */
16275 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16278 /* Source 1 cannot be a constant. */
16279 if (CONSTANT_P (src1))
16282 /* Source 1 cannot be a non-matching memory. */
16283 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16284 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16285 return (code == AND
16288 || (TARGET_64BIT && mode == DImode))
16289 && satisfies_constraint_L (src2));
16294 /* Attempt to expand a unary operator. Make the expansion closer to the
16295 actual machine, then just general_operand, which will allow 2 separate
16296 memory references (one output, one input) in a single insn. */
16299 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16302 int matching_memory;
16303 rtx src, dst, op, clob;
16308 /* If the destination is memory, and we do not have matching source
16309 operands, do things in registers. */
16310 matching_memory = 0;
16313 if (rtx_equal_p (dst, src))
16314 matching_memory = 1;
16316 dst = gen_reg_rtx (mode);
16319 /* When source operand is memory, destination must match. */
16320 if (MEM_P (src) && !matching_memory)
16321 src = force_reg (mode, src);
16323 /* Emit the instruction. */
16325 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16326 if (reload_in_progress || code == NOT)
16328 /* Reload doesn't know about the flags register, and doesn't know that
16329 it doesn't want to clobber it. */
16330 gcc_assert (code == NOT);
16335 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16336 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16339 /* Fix up the destination if needed. */
16340 if (dst != operands[0])
16341 emit_move_insn (operands[0], dst);
16344 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16345 divisor are within the range [0-255]. */
16348 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16351 rtx end_label, qimode_label;
16352 rtx insn, div, mod;
16353 rtx scratch, tmp0, tmp1, tmp2;
16354 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16355 rtx (*gen_zero_extend) (rtx, rtx);
16356 rtx (*gen_test_ccno_1) (rtx, rtx);
16361 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16362 gen_test_ccno_1 = gen_testsi_ccno_1;
16363 gen_zero_extend = gen_zero_extendqisi2;
16366 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16367 gen_test_ccno_1 = gen_testdi_ccno_1;
16368 gen_zero_extend = gen_zero_extendqidi2;
16371 gcc_unreachable ();
16374 end_label = gen_label_rtx ();
16375 qimode_label = gen_label_rtx ();
16377 scratch = gen_reg_rtx (mode);
16379 /* Use 8bit unsigned divimod if dividend and divisor are within
16380 the range [0-255]. */
16381 emit_move_insn (scratch, operands[2]);
16382 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16383 scratch, 1, OPTAB_DIRECT);
16384 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16385 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16386 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16387 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16388 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16390 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16391 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16392 JUMP_LABEL (insn) = qimode_label;
16394 /* Generate original signed/unsigned divimod. */
16395 div = gen_divmod4_1 (operands[0], operands[1],
16396 operands[2], operands[3]);
16399 /* Branch to the end. */
16400 emit_jump_insn (gen_jump (end_label));
16403 /* Generate 8bit unsigned divide. */
16404 emit_label (qimode_label);
16405 /* Don't use operands[0] for result of 8bit divide since not all
16406 registers support QImode ZERO_EXTRACT. */
16407 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16408 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16409 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16410 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16414 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16415 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16419 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16420 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16423 /* Extract remainder from AH. */
16424 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16425 if (REG_P (operands[1]))
16426 insn = emit_move_insn (operands[1], tmp1);
16429 /* Need a new scratch register since the old one has result
16431 scratch = gen_reg_rtx (mode);
16432 emit_move_insn (scratch, tmp1);
16433 insn = emit_move_insn (operands[1], scratch);
16435 set_unique_reg_note (insn, REG_EQUAL, mod);
16437 /* Zero extend quotient from AL. */
16438 tmp1 = gen_lowpart (QImode, tmp0);
16439 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16440 set_unique_reg_note (insn, REG_EQUAL, div);
16442 emit_label (end_label);
16445 #define LEA_MAX_STALL (3)
16446 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16448 /* Increase given DISTANCE in half-cycles according to
16449 dependencies between PREV and NEXT instructions.
16450 Add 1 half-cycle if there is no dependency and
16451 go to next cycle if there is some dependecy. */
16453 static unsigned int
16454 increase_distance (rtx prev, rtx next, unsigned int distance)
16459 if (!prev || !next)
16460 return distance + (distance & 1) + 2;
16462 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
16463 return distance + 1;
16465 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16466 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16467 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
16468 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
16469 return distance + (distance & 1) + 2;
16471 return distance + 1;
16474 /* Function checks if instruction INSN defines register number
16475 REGNO1 or REGNO2. */
16478 insn_defines_reg (unsigned int regno1, unsigned int regno2,
16483 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
16484 if (DF_REF_REG_DEF_P (*def_rec)
16485 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16486 && (regno1 == DF_REF_REGNO (*def_rec)
16487 || regno2 == DF_REF_REGNO (*def_rec)))
16495 /* Function checks if instruction INSN uses register number
16496 REGNO as a part of address expression. */
16499 insn_uses_reg_mem (unsigned int regno, rtx insn)
16503 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
16504 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
16510 /* Search backward for non-agu definition of register number REGNO1
16511 or register number REGNO2 in basic block starting from instruction
16512 START up to head of basic block or instruction INSN.
16514 Function puts true value into *FOUND var if definition was found
16515 and false otherwise.
16517 Distance in half-cycles between START and found instruction or head
16518 of BB is added to DISTANCE and returned. */
16521 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16522 rtx insn, int distance,
16523 rtx start, bool *found)
16525 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16533 && distance < LEA_SEARCH_THRESHOLD)
16535 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16537 distance = increase_distance (prev, next, distance);
16538 if (insn_defines_reg (regno1, regno2, prev))
16540 if (recog_memoized (prev) < 0
16541 || get_attr_type (prev) != TYPE_LEA)
16550 if (prev == BB_HEAD (bb))
16553 prev = PREV_INSN (prev);
16559 /* Search backward for non-agu definition of register number REGNO1
16560 or register number REGNO2 in INSN's basic block until
16561 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16562 2. Reach neighbour BBs boundary, or
16563 3. Reach agu definition.
16564 Returns the distance between the non-agu definition point and INSN.
16565 If no definition point, returns -1. */
16568 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16571 basic_block bb = BLOCK_FOR_INSN (insn);
16573 bool found = false;
16575 if (insn != BB_HEAD (bb))
16576 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16577 distance, PREV_INSN (insn),
16580 if (!found && distance < LEA_SEARCH_THRESHOLD)
16584 bool simple_loop = false;
16586 FOR_EACH_EDGE (e, ei, bb->preds)
16589 simple_loop = true;
16594 distance = distance_non_agu_define_in_bb (regno1, regno2,
16596 BB_END (bb), &found);
16599 int shortest_dist = -1;
16600 bool found_in_bb = false;
16602 FOR_EACH_EDGE (e, ei, bb->preds)
16605 = distance_non_agu_define_in_bb (regno1, regno2,
16611 if (shortest_dist < 0)
16612 shortest_dist = bb_dist;
16613 else if (bb_dist > 0)
16614 shortest_dist = MIN (bb_dist, shortest_dist);
16620 distance = shortest_dist;
16624 /* get_attr_type may modify recog data. We want to make sure
16625 that recog data is valid for instruction INSN, on which
16626 distance_non_agu_define is called. INSN is unchanged here. */
16627 extract_insn_cached (insn);
16632 return distance >> 1;
16635 /* Return the distance in half-cycles between INSN and the next
16636 insn that uses register number REGNO in memory address added
16637 to DISTANCE. Return -1 if REGNO0 is set.
16639 Put true value into *FOUND if register usage was found and
16641 Put true value into *REDEFINED if register redefinition was
16642 found and false otherwise. */
16645 distance_agu_use_in_bb (unsigned int regno,
16646 rtx insn, int distance, rtx start,
16647 bool *found, bool *redefined)
16649 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16654 *redefined = false;
16658 && distance < LEA_SEARCH_THRESHOLD)
16660 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16662 distance = increase_distance(prev, next, distance);
16663 if (insn_uses_reg_mem (regno, next))
16665 /* Return DISTANCE if OP0 is used in memory
16666 address in NEXT. */
16671 if (insn_defines_reg (regno, INVALID_REGNUM, next))
16673 /* Return -1 if OP0 is set in NEXT. */
16681 if (next == BB_END (bb))
16684 next = NEXT_INSN (next);
16690 /* Return the distance between INSN and the next insn that uses
16691 register number REGNO0 in memory address. Return -1 if no such
16692 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16695 distance_agu_use (unsigned int regno0, rtx insn)
16697 basic_block bb = BLOCK_FOR_INSN (insn);
16699 bool found = false;
16700 bool redefined = false;
16702 if (insn != BB_END (bb))
16703 distance = distance_agu_use_in_bb (regno0, insn, distance,
16705 &found, &redefined);
16707 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16711 bool simple_loop = false;
16713 FOR_EACH_EDGE (e, ei, bb->succs)
16716 simple_loop = true;
16721 distance = distance_agu_use_in_bb (regno0, insn,
16722 distance, BB_HEAD (bb),
16723 &found, &redefined);
16726 int shortest_dist = -1;
16727 bool found_in_bb = false;
16728 bool redefined_in_bb = false;
16730 FOR_EACH_EDGE (e, ei, bb->succs)
16733 = distance_agu_use_in_bb (regno0, insn,
16734 distance, BB_HEAD (e->dest),
16735 &found_in_bb, &redefined_in_bb);
16738 if (shortest_dist < 0)
16739 shortest_dist = bb_dist;
16740 else if (bb_dist > 0)
16741 shortest_dist = MIN (bb_dist, shortest_dist);
16747 distance = shortest_dist;
16751 if (!found || redefined)
16754 return distance >> 1;
16757 /* Define this macro to tune LEA priority vs ADD, it take effect when
16758 there is a dilemma of choicing LEA or ADD
16759 Negative value: ADD is more preferred than LEA
16761 Positive value: LEA is more preferred than ADD*/
16762 #define IX86_LEA_PRIORITY 0
16764 /* Return true if usage of lea INSN has performance advantage
16765 over a sequence of instructions. Instructions sequence has
16766 SPLIT_COST cycles higher latency than lea latency. */
16769 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
16770 unsigned int regno2, int split_cost)
16772 int dist_define, dist_use;
16774 dist_define = distance_non_agu_define (regno1, regno2, insn);
16775 dist_use = distance_agu_use (regno0, insn);
16777 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16779 /* If there is no non AGU operand definition, no AGU
16780 operand usage and split cost is 0 then both lea
16781 and non lea variants have same priority. Currently
16782 we prefer lea for 64 bit code and non lea on 32 bit
16784 if (dist_use < 0 && split_cost == 0)
16785 return TARGET_64BIT || IX86_LEA_PRIORITY;
16790 /* With longer definitions distance lea is more preferable.
16791 Here we change it to take into account splitting cost and
16793 dist_define += split_cost + IX86_LEA_PRIORITY;
16795 /* If there is no use in memory addess then we just check
16796 that split cost does not exceed AGU stall. */
16798 return dist_define >= LEA_MAX_STALL;
16800 /* If this insn has both backward non-agu dependence and forward
16801 agu dependence, the one with short distance takes effect. */
16802 return dist_define >= dist_use;
16805 /* Return true if it is legal to clobber flags by INSN and
16806 false otherwise. */
16809 ix86_ok_to_clobber_flags (rtx insn)
16811 basic_block bb = BLOCK_FOR_INSN (insn);
16817 if (NONDEBUG_INSN_P (insn))
16819 for (use = DF_INSN_USES (insn); *use; use++)
16820 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
16823 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
16827 if (insn == BB_END (bb))
16830 insn = NEXT_INSN (insn);
16833 live = df_get_live_out(bb);
16834 return !REGNO_REG_SET_P (live, FLAGS_REG);
16837 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16838 move and add to avoid AGU stalls. */
16841 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
16843 unsigned int regno0 = true_regnum (operands[0]);
16844 unsigned int regno1 = true_regnum (operands[1]);
16845 unsigned int regno2 = true_regnum (operands[2]);
16847 /* Check if we need to optimize. */
16848 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16851 /* Check it is correct to split here. */
16852 if (!ix86_ok_to_clobber_flags(insn))
16855 /* We need to split only adds with non destructive
16856 destination operand. */
16857 if (regno0 == regno1 || regno0 == regno2)
16860 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
16863 /* Return true if we should emit lea instruction instead of mov
16867 ix86_use_lea_for_mov (rtx insn, rtx operands[])
16869 unsigned int regno0;
16870 unsigned int regno1;
16872 /* Check if we need to optimize. */
16873 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16876 /* Use lea for reg to reg moves only. */
16877 if (!REG_P (operands[0]) || !REG_P (operands[1]))
16880 regno0 = true_regnum (operands[0]);
16881 regno1 = true_regnum (operands[1]);
16883 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0);
16886 /* Return true if we need to split lea into a sequence of
16887 instructions to avoid AGU stalls. */
16890 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
16892 unsigned int regno0 = true_regnum (operands[0]) ;
16893 unsigned int regno1 = INVALID_REGNUM;
16894 unsigned int regno2 = INVALID_REGNUM;
16895 int split_cost = 0;
16896 struct ix86_address parts;
16899 /* FIXME: Handle zero-extended addresses. */
16900 if (GET_CODE (operands[1]) == ZERO_EXTEND
16901 || GET_CODE (operands[1]) == AND)
16904 /* Check we need to optimize. */
16905 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16908 /* Check it is correct to split here. */
16909 if (!ix86_ok_to_clobber_flags(insn))
16912 ok = ix86_decompose_address (operands[1], &parts);
16915 /* There should be at least two components in the address. */
16916 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
16917 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
16920 /* We should not split into add if non legitimate pic
16921 operand is used as displacement. */
16922 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16926 regno1 = true_regnum (parts.base);
16928 regno2 = true_regnum (parts.index);
16930 /* Compute how many cycles we will add to execution time
16931 if split lea into a sequence of instructions. */
16932 if (parts.base || parts.index)
16934 /* Have to use mov instruction if non desctructive
16935 destination form is used. */
16936 if (regno1 != regno0 && regno2 != regno0)
16939 /* Have to add index to base if both exist. */
16940 if (parts.base && parts.index)
16943 /* Have to use shift and adds if scale is 2 or greater. */
16944 if (parts.scale > 1)
16946 if (regno0 != regno1)
16948 else if (regno2 == regno0)
16951 split_cost += parts.scale;
16954 /* Have to use add instruction with immediate if
16955 disp is non zero. */
16956 if (parts.disp && parts.disp != const0_rtx)
16959 /* Subtract the price of lea. */
16963 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
16966 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16967 matches destination. RTX includes clobber of FLAGS_REG. */
16970 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
16975 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
16976 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16978 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16981 /* Split lea instructions into a sequence of instructions
16982 which are executed on ALU to avoid AGU stalls.
16983 It is assumed that it is allowed to clobber flags register
16984 at lea position. */
16987 ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
16989 unsigned int regno0 = true_regnum (operands[0]) ;
16990 unsigned int regno1 = INVALID_REGNUM;
16991 unsigned int regno2 = INVALID_REGNUM;
16992 struct ix86_address parts;
16996 ok = ix86_decompose_address (operands[1], &parts);
17001 if (GET_MODE (parts.base) != mode)
17002 parts.base = gen_rtx_SUBREG (mode, parts.base, 0);
17003 regno1 = true_regnum (parts.base);
17008 if (GET_MODE (parts.index) != mode)
17009 parts.index = gen_rtx_SUBREG (mode, parts.index, 0);
17010 regno2 = true_regnum (parts.index);
17013 if (parts.scale > 1)
17015 /* Case r1 = r1 + ... */
17016 if (regno1 == regno0)
17018 /* If we have a case r1 = r1 + C * r1 then we
17019 should use multiplication which is very
17020 expensive. Assume cost model is wrong if we
17021 have such case here. */
17022 gcc_assert (regno2 != regno0);
17024 for (adds = parts.scale; adds > 0; adds--)
17025 ix86_emit_binop (PLUS, mode, operands[0], parts.index);
17029 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17030 if (regno0 != regno2)
17031 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
17033 /* Use shift for scaling. */
17034 ix86_emit_binop (ASHIFT, mode, operands[0],
17035 GEN_INT (exact_log2 (parts.scale)));
17038 ix86_emit_binop (PLUS, mode, operands[0], parts.base);
17040 if (parts.disp && parts.disp != const0_rtx)
17041 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
17044 else if (!parts.base && !parts.index)
17046 gcc_assert(parts.disp);
17047 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.disp));
17053 if (regno0 != regno2)
17054 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
17056 else if (!parts.index)
17058 if (regno0 != regno1)
17059 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
17063 if (regno0 == regno1)
17065 else if (regno0 == regno2)
17069 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
17073 ix86_emit_binop (PLUS, mode, operands[0], tmp);
17076 if (parts.disp && parts.disp != const0_rtx)
17077 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
17081 /* Return true if it is ok to optimize an ADD operation to LEA
17082 operation to avoid flag register consumation. For most processors,
17083 ADD is faster than LEA. For the processors like ATOM, if the
17084 destination register of LEA holds an actual address which will be
17085 used soon, LEA is better and otherwise ADD is better. */
17088 ix86_lea_for_add_ok (rtx insn, rtx operands[])
17090 unsigned int regno0 = true_regnum (operands[0]);
17091 unsigned int regno1 = true_regnum (operands[1]);
17092 unsigned int regno2 = true_regnum (operands[2]);
17094 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17095 if (regno0 != regno1 && regno0 != regno2)
17098 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17101 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
17104 /* Return true if destination reg of SET_BODY is shift count of
17108 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
17114 /* Retrieve destination of SET_BODY. */
17115 switch (GET_CODE (set_body))
17118 set_dest = SET_DEST (set_body);
17119 if (!set_dest || !REG_P (set_dest))
17123 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
17124 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
17132 /* Retrieve shift count of USE_BODY. */
17133 switch (GET_CODE (use_body))
17136 shift_rtx = XEXP (use_body, 1);
17139 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
17140 if (ix86_dep_by_shift_count_body (set_body,
17141 XVECEXP (use_body, 0, i)))
17149 && (GET_CODE (shift_rtx) == ASHIFT
17150 || GET_CODE (shift_rtx) == LSHIFTRT
17151 || GET_CODE (shift_rtx) == ASHIFTRT
17152 || GET_CODE (shift_rtx) == ROTATE
17153 || GET_CODE (shift_rtx) == ROTATERT))
17155 rtx shift_count = XEXP (shift_rtx, 1);
17157 /* Return true if shift count is dest of SET_BODY. */
17158 if (REG_P (shift_count)
17159 && true_regnum (set_dest) == true_regnum (shift_count))
17166 /* Return true if destination reg of SET_INSN is shift count of
17170 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
17172 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
17173 PATTERN (use_insn));
17176 /* Return TRUE or FALSE depending on whether the unary operator meets the
17177 appropriate constraints. */
17180 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
17181 enum machine_mode mode ATTRIBUTE_UNUSED,
17182 rtx operands[2] ATTRIBUTE_UNUSED)
17184 /* If one of operands is memory, source and destination must match. */
17185 if ((MEM_P (operands[0])
17186 || MEM_P (operands[1]))
17187 && ! rtx_equal_p (operands[0], operands[1]))
17192 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17193 are ok, keeping in mind the possible movddup alternative. */
17196 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
17198 if (MEM_P (operands[0]))
17199 return rtx_equal_p (operands[0], operands[1 + high]);
17200 if (MEM_P (operands[1]) && MEM_P (operands[2]))
17201 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
17205 /* Post-reload splitter for converting an SF or DFmode value in an
17206 SSE register into an unsigned SImode. */
17209 ix86_split_convert_uns_si_sse (rtx operands[])
17211 enum machine_mode vecmode;
17212 rtx value, large, zero_or_two31, input, two31, x;
17214 large = operands[1];
17215 zero_or_two31 = operands[2];
17216 input = operands[3];
17217 two31 = operands[4];
17218 vecmode = GET_MODE (large);
17219 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
17221 /* Load up the value into the low element. We must ensure that the other
17222 elements are valid floats -- zero is the easiest such value. */
17225 if (vecmode == V4SFmode)
17226 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
17228 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
17232 input = gen_rtx_REG (vecmode, REGNO (input));
17233 emit_move_insn (value, CONST0_RTX (vecmode));
17234 if (vecmode == V4SFmode)
17235 emit_insn (gen_sse_movss (value, value, input));
17237 emit_insn (gen_sse2_movsd (value, value, input));
17240 emit_move_insn (large, two31);
17241 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
17243 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
17244 emit_insn (gen_rtx_SET (VOIDmode, large, x));
17246 x = gen_rtx_AND (vecmode, zero_or_two31, large);
17247 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
17249 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
17250 emit_insn (gen_rtx_SET (VOIDmode, value, x));
17252 large = gen_rtx_REG (V4SImode, REGNO (large));
17253 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
17255 x = gen_rtx_REG (V4SImode, REGNO (value));
17256 if (vecmode == V4SFmode)
17257 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
17259 emit_insn (gen_sse2_cvttpd2dq (x, value));
17262 emit_insn (gen_xorv4si3 (value, value, large));
17265 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17266 Expects the 64-bit DImode to be supplied in a pair of integral
17267 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17268 -mfpmath=sse, !optimize_size only. */
17271 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
17273 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
17274 rtx int_xmm, fp_xmm;
17275 rtx biases, exponents;
17278 int_xmm = gen_reg_rtx (V4SImode);
17279 if (TARGET_INTER_UNIT_MOVES)
17280 emit_insn (gen_movdi_to_sse (int_xmm, input));
17281 else if (TARGET_SSE_SPLIT_REGS)
17283 emit_clobber (int_xmm);
17284 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
17288 x = gen_reg_rtx (V2DImode);
17289 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
17290 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
17293 x = gen_rtx_CONST_VECTOR (V4SImode,
17294 gen_rtvec (4, GEN_INT (0x43300000UL),
17295 GEN_INT (0x45300000UL),
17296 const0_rtx, const0_rtx));
17297 exponents = validize_mem (force_const_mem (V4SImode, x));
17299 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17300 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
17302 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17303 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17304 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17305 (0x1.0p84 + double(fp_value_hi_xmm)).
17306 Note these exponents differ by 32. */
17308 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
17310 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17311 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17312 real_ldexp (&bias_lo_rvt, &dconst1, 52);
17313 real_ldexp (&bias_hi_rvt, &dconst1, 84);
17314 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
17315 x = const_double_from_real_value (bias_hi_rvt, DFmode);
17316 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
17317 biases = validize_mem (force_const_mem (V2DFmode, biases));
17318 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
17320 /* Add the upper and lower DFmode values together. */
17322 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
17325 x = copy_to_mode_reg (V2DFmode, fp_xmm);
17326 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
17327 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
17330 ix86_expand_vector_extract (false, target, fp_xmm, 0);
17333 /* Not used, but eases macroization of patterns. */
17335 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
17336 rtx input ATTRIBUTE_UNUSED)
17338 gcc_unreachable ();
17341 /* Convert an unsigned SImode value into a DFmode. Only currently used
17342 for SSE, but applicable anywhere. */
17345 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
17347 REAL_VALUE_TYPE TWO31r;
17350 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
17351 NULL, 1, OPTAB_DIRECT);
17353 fp = gen_reg_rtx (DFmode);
17354 emit_insn (gen_floatsidf2 (fp, x));
17356 real_ldexp (&TWO31r, &dconst1, 31);
17357 x = const_double_from_real_value (TWO31r, DFmode);
17359 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
17361 emit_move_insn (target, x);
17364 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17365 32-bit mode; otherwise we have a direct convert instruction. */
17368 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
17370 REAL_VALUE_TYPE TWO32r;
17371 rtx fp_lo, fp_hi, x;
17373 fp_lo = gen_reg_rtx (DFmode);
17374 fp_hi = gen_reg_rtx (DFmode);
17376 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
17378 real_ldexp (&TWO32r, &dconst1, 32);
17379 x = const_double_from_real_value (TWO32r, DFmode);
17380 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
17382 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
17384 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
17387 emit_move_insn (target, x);
17390 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17391 For x86_32, -mfpmath=sse, !optimize_size only. */
17393 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
17395 REAL_VALUE_TYPE ONE16r;
17396 rtx fp_hi, fp_lo, int_hi, int_lo, x;
17398 real_ldexp (&ONE16r, &dconst1, 16);
17399 x = const_double_from_real_value (ONE16r, SFmode);
17400 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
17401 NULL, 0, OPTAB_DIRECT);
17402 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
17403 NULL, 0, OPTAB_DIRECT);
17404 fp_hi = gen_reg_rtx (SFmode);
17405 fp_lo = gen_reg_rtx (SFmode);
17406 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
17407 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
17408 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
17410 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
17412 if (!rtx_equal_p (target, fp_hi))
17413 emit_move_insn (target, fp_hi);
17416 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17417 a vector of unsigned ints VAL to vector of floats TARGET. */
17420 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
17423 REAL_VALUE_TYPE TWO16r;
17424 enum machine_mode intmode = GET_MODE (val);
17425 enum machine_mode fltmode = GET_MODE (target);
17426 rtx (*cvt) (rtx, rtx);
17428 if (intmode == V4SImode)
17429 cvt = gen_floatv4siv4sf2;
17431 cvt = gen_floatv8siv8sf2;
17432 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
17433 tmp[0] = force_reg (intmode, tmp[0]);
17434 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
17436 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
17437 NULL_RTX, 1, OPTAB_DIRECT);
17438 tmp[3] = gen_reg_rtx (fltmode);
17439 emit_insn (cvt (tmp[3], tmp[1]));
17440 tmp[4] = gen_reg_rtx (fltmode);
17441 emit_insn (cvt (tmp[4], tmp[2]));
17442 real_ldexp (&TWO16r, &dconst1, 16);
17443 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
17444 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
17445 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
17447 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
17449 if (tmp[7] != target)
17450 emit_move_insn (target, tmp[7]);
17453 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17454 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17455 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17456 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17459 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
17461 REAL_VALUE_TYPE TWO31r;
17462 rtx two31r, tmp[4];
17463 enum machine_mode mode = GET_MODE (val);
17464 enum machine_mode scalarmode = GET_MODE_INNER (mode);
17465 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
17466 rtx (*cmp) (rtx, rtx, rtx, rtx);
17469 for (i = 0; i < 3; i++)
17470 tmp[i] = gen_reg_rtx (mode);
17471 real_ldexp (&TWO31r, &dconst1, 31);
17472 two31r = const_double_from_real_value (TWO31r, scalarmode);
17473 two31r = ix86_build_const_vector (mode, 1, two31r);
17474 two31r = force_reg (mode, two31r);
17477 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
17478 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
17479 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
17480 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
17481 default: gcc_unreachable ();
17483 tmp[3] = gen_rtx_LE (mode, two31r, val);
17484 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
17485 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
17487 if (intmode == V4SImode || TARGET_AVX2)
17488 *xorp = expand_simple_binop (intmode, ASHIFT,
17489 gen_lowpart (intmode, tmp[0]),
17490 GEN_INT (31), NULL_RTX, 0,
17494 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
17495 two31 = ix86_build_const_vector (intmode, 1, two31);
17496 *xorp = expand_simple_binop (intmode, AND,
17497 gen_lowpart (intmode, tmp[0]),
17498 two31, NULL_RTX, 0,
17501 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
17505 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17506 then replicate the value for all elements of the vector
17510 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
17514 enum machine_mode scalar_mode;
17531 n_elt = GET_MODE_NUNITS (mode);
17532 v = rtvec_alloc (n_elt);
17533 scalar_mode = GET_MODE_INNER (mode);
17535 RTVEC_ELT (v, 0) = value;
17537 for (i = 1; i < n_elt; ++i)
17538 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
17540 return gen_rtx_CONST_VECTOR (mode, v);
17543 gcc_unreachable ();
17547 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17548 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17549 for an SSE register. If VECT is true, then replicate the mask for
17550 all elements of the vector register. If INVERT is true, then create
17551 a mask excluding the sign bit. */
17554 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
17556 enum machine_mode vec_mode, imode;
17557 HOST_WIDE_INT hi, lo;
17562 /* Find the sign bit, sign extended to 2*HWI. */
17570 mode = GET_MODE_INNER (mode);
17572 lo = 0x80000000, hi = lo < 0;
17580 mode = GET_MODE_INNER (mode);
17582 if (HOST_BITS_PER_WIDE_INT >= 64)
17583 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
17585 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17590 vec_mode = VOIDmode;
17591 if (HOST_BITS_PER_WIDE_INT >= 64)
17594 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
17601 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17605 lo = ~lo, hi = ~hi;
17611 mask = immed_double_const (lo, hi, imode);
17613 vec = gen_rtvec (2, v, mask);
17614 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
17615 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17622 gcc_unreachable ();
17626 lo = ~lo, hi = ~hi;
17628 /* Force this value into the low part of a fp vector constant. */
17629 mask = immed_double_const (lo, hi, imode);
17630 mask = gen_lowpart (mode, mask);
17632 if (vec_mode == VOIDmode)
17633 return force_reg (mode, mask);
17635 v = ix86_build_const_vector (vec_mode, vect, mask);
17636 return force_reg (vec_mode, v);
17639 /* Generate code for floating point ABS or NEG. */
17642 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17645 rtx mask, set, dst, src;
17646 bool use_sse = false;
17647 bool vector_mode = VECTOR_MODE_P (mode);
17648 enum machine_mode vmode = mode;
17652 else if (mode == TFmode)
17654 else if (TARGET_SSE_MATH)
17656 use_sse = SSE_FLOAT_MODE_P (mode);
17657 if (mode == SFmode)
17659 else if (mode == DFmode)
17663 /* NEG and ABS performed with SSE use bitwise mask operations.
17664 Create the appropriate mask now. */
17666 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17673 set = gen_rtx_fmt_e (code, mode, src);
17674 set = gen_rtx_SET (VOIDmode, dst, set);
17681 use = gen_rtx_USE (VOIDmode, mask);
17683 par = gen_rtvec (2, set, use);
17686 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17687 par = gen_rtvec (3, set, use, clob);
17689 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17695 /* Expand a copysign operation. Special case operand 0 being a constant. */
17698 ix86_expand_copysign (rtx operands[])
17700 enum machine_mode mode, vmode;
17701 rtx dest, op0, op1, mask, nmask;
17703 dest = operands[0];
17707 mode = GET_MODE (dest);
17709 if (mode == SFmode)
17711 else if (mode == DFmode)
17716 if (GET_CODE (op0) == CONST_DOUBLE)
17718 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17720 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17721 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17723 if (mode == SFmode || mode == DFmode)
17725 if (op0 == CONST0_RTX (mode))
17726 op0 = CONST0_RTX (vmode);
17729 rtx v = ix86_build_const_vector (vmode, false, op0);
17731 op0 = force_reg (vmode, v);
17734 else if (op0 != CONST0_RTX (mode))
17735 op0 = force_reg (mode, op0);
17737 mask = ix86_build_signbit_mask (vmode, 0, 0);
17739 if (mode == SFmode)
17740 copysign_insn = gen_copysignsf3_const;
17741 else if (mode == DFmode)
17742 copysign_insn = gen_copysigndf3_const;
17744 copysign_insn = gen_copysigntf3_const;
17746 emit_insn (copysign_insn (dest, op0, op1, mask));
17750 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17752 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17753 mask = ix86_build_signbit_mask (vmode, 0, 0);
17755 if (mode == SFmode)
17756 copysign_insn = gen_copysignsf3_var;
17757 else if (mode == DFmode)
17758 copysign_insn = gen_copysigndf3_var;
17760 copysign_insn = gen_copysigntf3_var;
17762 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17766 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17767 be a constant, and so has already been expanded into a vector constant. */
17770 ix86_split_copysign_const (rtx operands[])
17772 enum machine_mode mode, vmode;
17773 rtx dest, op0, mask, x;
17775 dest = operands[0];
17777 mask = operands[3];
17779 mode = GET_MODE (dest);
17780 vmode = GET_MODE (mask);
17782 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17783 x = gen_rtx_AND (vmode, dest, mask);
17784 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17786 if (op0 != CONST0_RTX (vmode))
17788 x = gen_rtx_IOR (vmode, dest, op0);
17789 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17793 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17794 so we have to do two masks. */
17797 ix86_split_copysign_var (rtx operands[])
17799 enum machine_mode mode, vmode;
17800 rtx dest, scratch, op0, op1, mask, nmask, x;
17802 dest = operands[0];
17803 scratch = operands[1];
17806 nmask = operands[4];
17807 mask = operands[5];
17809 mode = GET_MODE (dest);
17810 vmode = GET_MODE (mask);
17812 if (rtx_equal_p (op0, op1))
17814 /* Shouldn't happen often (it's useless, obviously), but when it does
17815 we'd generate incorrect code if we continue below. */
17816 emit_move_insn (dest, op0);
17820 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17822 gcc_assert (REGNO (op1) == REGNO (scratch));
17824 x = gen_rtx_AND (vmode, scratch, mask);
17825 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17828 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17829 x = gen_rtx_NOT (vmode, dest);
17830 x = gen_rtx_AND (vmode, x, op0);
17831 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17835 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17837 x = gen_rtx_AND (vmode, scratch, mask);
17839 else /* alternative 2,4 */
17841 gcc_assert (REGNO (mask) == REGNO (scratch));
17842 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17843 x = gen_rtx_AND (vmode, scratch, op1);
17845 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17847 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17849 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17850 x = gen_rtx_AND (vmode, dest, nmask);
17852 else /* alternative 3,4 */
17854 gcc_assert (REGNO (nmask) == REGNO (dest));
17856 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17857 x = gen_rtx_AND (vmode, dest, op0);
17859 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17862 x = gen_rtx_IOR (vmode, dest, scratch);
17863 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17866 /* Return TRUE or FALSE depending on whether the first SET in INSN
17867 has source and destination with matching CC modes, and that the
17868 CC mode is at least as constrained as REQ_MODE. */
17871 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17874 enum machine_mode set_mode;
17876 set = PATTERN (insn);
17877 if (GET_CODE (set) == PARALLEL)
17878 set = XVECEXP (set, 0, 0);
17879 gcc_assert (GET_CODE (set) == SET);
17880 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17882 set_mode = GET_MODE (SET_DEST (set));
17886 if (req_mode != CCNOmode
17887 && (req_mode != CCmode
17888 || XEXP (SET_SRC (set), 1) != const0_rtx))
17892 if (req_mode == CCGCmode)
17896 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17900 if (req_mode == CCZmode)
17910 if (set_mode != req_mode)
17915 gcc_unreachable ();
17918 return GET_MODE (SET_SRC (set)) == set_mode;
17921 /* Generate insn patterns to do an integer compare of OPERANDS. */
17924 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17926 enum machine_mode cmpmode;
17929 cmpmode = SELECT_CC_MODE (code, op0, op1);
17930 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17932 /* This is very simple, but making the interface the same as in the
17933 FP case makes the rest of the code easier. */
17934 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17935 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17937 /* Return the test that should be put into the flags user, i.e.
17938 the bcc, scc, or cmov instruction. */
17939 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17942 /* Figure out whether to use ordered or unordered fp comparisons.
17943 Return the appropriate mode to use. */
17946 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17948 /* ??? In order to make all comparisons reversible, we do all comparisons
17949 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17950 all forms trapping and nontrapping comparisons, we can make inequality
17951 comparisons trapping again, since it results in better code when using
17952 FCOM based compares. */
17953 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17957 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17959 enum machine_mode mode = GET_MODE (op0);
17961 if (SCALAR_FLOAT_MODE_P (mode))
17963 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17964 return ix86_fp_compare_mode (code);
17969 /* Only zero flag is needed. */
17970 case EQ: /* ZF=0 */
17971 case NE: /* ZF!=0 */
17973 /* Codes needing carry flag. */
17974 case GEU: /* CF=0 */
17975 case LTU: /* CF=1 */
17976 /* Detect overflow checks. They need just the carry flag. */
17977 if (GET_CODE (op0) == PLUS
17978 && rtx_equal_p (op1, XEXP (op0, 0)))
17982 case GTU: /* CF=0 & ZF=0 */
17983 case LEU: /* CF=1 | ZF=1 */
17984 /* Detect overflow checks. They need just the carry flag. */
17985 if (GET_CODE (op0) == MINUS
17986 && rtx_equal_p (op1, XEXP (op0, 0)))
17990 /* Codes possibly doable only with sign flag when
17991 comparing against zero. */
17992 case GE: /* SF=OF or SF=0 */
17993 case LT: /* SF<>OF or SF=1 */
17994 if (op1 == const0_rtx)
17997 /* For other cases Carry flag is not required. */
17999 /* Codes doable only with sign flag when comparing
18000 against zero, but we miss jump instruction for it
18001 so we need to use relational tests against overflow
18002 that thus needs to be zero. */
18003 case GT: /* ZF=0 & SF=OF */
18004 case LE: /* ZF=1 | SF<>OF */
18005 if (op1 == const0_rtx)
18009 /* strcmp pattern do (use flags) and combine may ask us for proper
18014 gcc_unreachable ();
18018 /* Return the fixed registers used for condition codes. */
18021 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
18028 /* If two condition code modes are compatible, return a condition code
18029 mode which is compatible with both. Otherwise, return
18032 static enum machine_mode
18033 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
18038 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
18041 if ((m1 == CCGCmode && m2 == CCGOCmode)
18042 || (m1 == CCGOCmode && m2 == CCGCmode))
18048 gcc_unreachable ();
18078 /* These are only compatible with themselves, which we already
18085 /* Return a comparison we can do and that it is equivalent to
18086 swap_condition (code) apart possibly from orderedness.
18087 But, never change orderedness if TARGET_IEEE_FP, returning
18088 UNKNOWN in that case if necessary. */
18090 static enum rtx_code
18091 ix86_fp_swap_condition (enum rtx_code code)
18095 case GT: /* GTU - CF=0 & ZF=0 */
18096 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
18097 case GE: /* GEU - CF=0 */
18098 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
18099 case UNLT: /* LTU - CF=1 */
18100 return TARGET_IEEE_FP ? UNKNOWN : GT;
18101 case UNLE: /* LEU - CF=1 | ZF=1 */
18102 return TARGET_IEEE_FP ? UNKNOWN : GE;
18104 return swap_condition (code);
18108 /* Return cost of comparison CODE using the best strategy for performance.
18109 All following functions do use number of instructions as a cost metrics.
18110 In future this should be tweaked to compute bytes for optimize_size and
18111 take into account performance of various instructions on various CPUs. */
18114 ix86_fp_comparison_cost (enum rtx_code code)
18118 /* The cost of code using bit-twiddling on %ah. */
18135 arith_cost = TARGET_IEEE_FP ? 5 : 4;
18139 arith_cost = TARGET_IEEE_FP ? 6 : 4;
18142 gcc_unreachable ();
18145 switch (ix86_fp_comparison_strategy (code))
18147 case IX86_FPCMP_COMI:
18148 return arith_cost > 4 ? 3 : 2;
18149 case IX86_FPCMP_SAHF:
18150 return arith_cost > 4 ? 4 : 3;
18156 /* Return strategy to use for floating-point. We assume that fcomi is always
18157 preferrable where available, since that is also true when looking at size
18158 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18160 enum ix86_fpcmp_strategy
18161 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
18163 /* Do fcomi/sahf based test when profitable. */
18166 return IX86_FPCMP_COMI;
18168 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
18169 return IX86_FPCMP_SAHF;
18171 return IX86_FPCMP_ARITH;
18174 /* Swap, force into registers, or otherwise massage the two operands
18175 to a fp comparison. The operands are updated in place; the new
18176 comparison code is returned. */
18178 static enum rtx_code
18179 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
18181 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
18182 rtx op0 = *pop0, op1 = *pop1;
18183 enum machine_mode op_mode = GET_MODE (op0);
18184 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
18186 /* All of the unordered compare instructions only work on registers.
18187 The same is true of the fcomi compare instructions. The XFmode
18188 compare instructions require registers except when comparing
18189 against zero or when converting operand 1 from fixed point to
18193 && (fpcmp_mode == CCFPUmode
18194 || (op_mode == XFmode
18195 && ! (standard_80387_constant_p (op0) == 1
18196 || standard_80387_constant_p (op1) == 1)
18197 && GET_CODE (op1) != FLOAT)
18198 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
18200 op0 = force_reg (op_mode, op0);
18201 op1 = force_reg (op_mode, op1);
18205 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18206 things around if they appear profitable, otherwise force op0
18207 into a register. */
18209 if (standard_80387_constant_p (op0) == 0
18211 && ! (standard_80387_constant_p (op1) == 0
18214 enum rtx_code new_code = ix86_fp_swap_condition (code);
18215 if (new_code != UNKNOWN)
18218 tmp = op0, op0 = op1, op1 = tmp;
18224 op0 = force_reg (op_mode, op0);
18226 if (CONSTANT_P (op1))
18228 int tmp = standard_80387_constant_p (op1);
18230 op1 = validize_mem (force_const_mem (op_mode, op1));
18234 op1 = force_reg (op_mode, op1);
18237 op1 = force_reg (op_mode, op1);
18241 /* Try to rearrange the comparison to make it cheaper. */
18242 if (ix86_fp_comparison_cost (code)
18243 > ix86_fp_comparison_cost (swap_condition (code))
18244 && (REG_P (op1) || can_create_pseudo_p ()))
18247 tmp = op0, op0 = op1, op1 = tmp;
18248 code = swap_condition (code);
18250 op0 = force_reg (op_mode, op0);
18258 /* Convert comparison codes we use to represent FP comparison to integer
18259 code that will result in proper branch. Return UNKNOWN if no such code
18263 ix86_fp_compare_code_to_integer (enum rtx_code code)
18292 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18295 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
18297 enum machine_mode fpcmp_mode, intcmp_mode;
18300 fpcmp_mode = ix86_fp_compare_mode (code);
18301 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
18303 /* Do fcomi/sahf based test when profitable. */
18304 switch (ix86_fp_comparison_strategy (code))
18306 case IX86_FPCMP_COMI:
18307 intcmp_mode = fpcmp_mode;
18308 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18309 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18314 case IX86_FPCMP_SAHF:
18315 intcmp_mode = fpcmp_mode;
18316 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18317 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18321 scratch = gen_reg_rtx (HImode);
18322 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
18323 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
18326 case IX86_FPCMP_ARITH:
18327 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18328 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18329 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
18331 scratch = gen_reg_rtx (HImode);
18332 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
18334 /* In the unordered case, we have to check C2 for NaN's, which
18335 doesn't happen to work out to anything nice combination-wise.
18336 So do some bit twiddling on the value we've got in AH to come
18337 up with an appropriate set of condition codes. */
18339 intcmp_mode = CCNOmode;
18344 if (code == GT || !TARGET_IEEE_FP)
18346 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18351 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18352 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18353 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
18354 intcmp_mode = CCmode;
18360 if (code == LT && TARGET_IEEE_FP)
18362 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18363 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
18364 intcmp_mode = CCmode;
18369 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
18375 if (code == GE || !TARGET_IEEE_FP)
18377 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
18382 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18383 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
18389 if (code == LE && TARGET_IEEE_FP)
18391 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18392 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18393 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18394 intcmp_mode = CCmode;
18399 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18405 if (code == EQ && TARGET_IEEE_FP)
18407 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18408 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18409 intcmp_mode = CCmode;
18414 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18420 if (code == NE && TARGET_IEEE_FP)
18422 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18423 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
18429 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18435 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
18439 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
18444 gcc_unreachable ();
18452 /* Return the test that should be put into the flags user, i.e.
18453 the bcc, scc, or cmov instruction. */
18454 return gen_rtx_fmt_ee (code, VOIDmode,
18455 gen_rtx_REG (intcmp_mode, FLAGS_REG),
18460 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
18464 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
18465 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
18467 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
18469 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
18470 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18473 ret = ix86_expand_int_compare (code, op0, op1);
18479 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
18481 enum machine_mode mode = GET_MODE (op0);
18493 tmp = ix86_expand_compare (code, op0, op1);
18494 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18495 gen_rtx_LABEL_REF (VOIDmode, label),
18497 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18504 /* Expand DImode branch into multiple compare+branch. */
18506 rtx lo[2], hi[2], label2;
18507 enum rtx_code code1, code2, code3;
18508 enum machine_mode submode;
18510 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
18512 tmp = op0, op0 = op1, op1 = tmp;
18513 code = swap_condition (code);
18516 split_double_mode (mode, &op0, 1, lo+0, hi+0);
18517 split_double_mode (mode, &op1, 1, lo+1, hi+1);
18519 submode = mode == DImode ? SImode : DImode;
18521 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18522 avoid two branches. This costs one extra insn, so disable when
18523 optimizing for size. */
18525 if ((code == EQ || code == NE)
18526 && (!optimize_insn_for_size_p ()
18527 || hi[1] == const0_rtx || lo[1] == const0_rtx))
18532 if (hi[1] != const0_rtx)
18533 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
18534 NULL_RTX, 0, OPTAB_WIDEN);
18537 if (lo[1] != const0_rtx)
18538 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
18539 NULL_RTX, 0, OPTAB_WIDEN);
18541 tmp = expand_binop (submode, ior_optab, xor1, xor0,
18542 NULL_RTX, 0, OPTAB_WIDEN);
18544 ix86_expand_branch (code, tmp, const0_rtx, label);
18548 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18549 op1 is a constant and the low word is zero, then we can just
18550 examine the high word. Similarly for low word -1 and
18551 less-or-equal-than or greater-than. */
18553 if (CONST_INT_P (hi[1]))
18556 case LT: case LTU: case GE: case GEU:
18557 if (lo[1] == const0_rtx)
18559 ix86_expand_branch (code, hi[0], hi[1], label);
18563 case LE: case LEU: case GT: case GTU:
18564 if (lo[1] == constm1_rtx)
18566 ix86_expand_branch (code, hi[0], hi[1], label);
18574 /* Otherwise, we need two or three jumps. */
18576 label2 = gen_label_rtx ();
18579 code2 = swap_condition (code);
18580 code3 = unsigned_condition (code);
18584 case LT: case GT: case LTU: case GTU:
18587 case LE: code1 = LT; code2 = GT; break;
18588 case GE: code1 = GT; code2 = LT; break;
18589 case LEU: code1 = LTU; code2 = GTU; break;
18590 case GEU: code1 = GTU; code2 = LTU; break;
18592 case EQ: code1 = UNKNOWN; code2 = NE; break;
18593 case NE: code2 = UNKNOWN; break;
18596 gcc_unreachable ();
18601 * if (hi(a) < hi(b)) goto true;
18602 * if (hi(a) > hi(b)) goto false;
18603 * if (lo(a) < lo(b)) goto true;
18607 if (code1 != UNKNOWN)
18608 ix86_expand_branch (code1, hi[0], hi[1], label);
18609 if (code2 != UNKNOWN)
18610 ix86_expand_branch (code2, hi[0], hi[1], label2);
18612 ix86_expand_branch (code3, lo[0], lo[1], label);
18614 if (code2 != UNKNOWN)
18615 emit_label (label2);
18620 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18625 /* Split branch based on floating point condition. */
18627 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18628 rtx target1, rtx target2, rtx tmp, rtx pushed)
18633 if (target2 != pc_rtx)
18636 code = reverse_condition_maybe_unordered (code);
18641 condition = ix86_expand_fp_compare (code, op1, op2,
18644 /* Remove pushed operand from stack. */
18646 ix86_free_from_memory (GET_MODE (pushed));
18648 i = emit_jump_insn (gen_rtx_SET
18650 gen_rtx_IF_THEN_ELSE (VOIDmode,
18651 condition, target1, target2)));
18652 if (split_branch_probability >= 0)
18653 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18657 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18661 gcc_assert (GET_MODE (dest) == QImode);
18663 ret = ix86_expand_compare (code, op0, op1);
18664 PUT_MODE (ret, QImode);
18665 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18668 /* Expand comparison setting or clearing carry flag. Return true when
18669 successful and set pop for the operation. */
18671 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18673 enum machine_mode mode =
18674 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18676 /* Do not handle double-mode compares that go through special path. */
18677 if (mode == (TARGET_64BIT ? TImode : DImode))
18680 if (SCALAR_FLOAT_MODE_P (mode))
18682 rtx compare_op, compare_seq;
18684 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18686 /* Shortcut: following common codes never translate
18687 into carry flag compares. */
18688 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18689 || code == ORDERED || code == UNORDERED)
18692 /* These comparisons require zero flag; swap operands so they won't. */
18693 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18694 && !TARGET_IEEE_FP)
18699 code = swap_condition (code);
18702 /* Try to expand the comparison and verify that we end up with
18703 carry flag based comparison. This fails to be true only when
18704 we decide to expand comparison using arithmetic that is not
18705 too common scenario. */
18707 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18708 compare_seq = get_insns ();
18711 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18712 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18713 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18715 code = GET_CODE (compare_op);
18717 if (code != LTU && code != GEU)
18720 emit_insn (compare_seq);
18725 if (!INTEGRAL_MODE_P (mode))
18734 /* Convert a==0 into (unsigned)a<1. */
18737 if (op1 != const0_rtx)
18740 code = (code == EQ ? LTU : GEU);
18743 /* Convert a>b into b<a or a>=b-1. */
18746 if (CONST_INT_P (op1))
18748 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18749 /* Bail out on overflow. We still can swap operands but that
18750 would force loading of the constant into register. */
18751 if (op1 == const0_rtx
18752 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18754 code = (code == GTU ? GEU : LTU);
18761 code = (code == GTU ? LTU : GEU);
18765 /* Convert a>=0 into (unsigned)a<0x80000000. */
18768 if (mode == DImode || op1 != const0_rtx)
18770 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18771 code = (code == LT ? GEU : LTU);
18775 if (mode == DImode || op1 != constm1_rtx)
18777 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18778 code = (code == LE ? GEU : LTU);
18784 /* Swapping operands may cause constant to appear as first operand. */
18785 if (!nonimmediate_operand (op0, VOIDmode))
18787 if (!can_create_pseudo_p ())
18789 op0 = force_reg (mode, op0);
18791 *pop = ix86_expand_compare (code, op0, op1);
18792 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18797 ix86_expand_int_movcc (rtx operands[])
18799 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18800 rtx compare_seq, compare_op;
18801 enum machine_mode mode = GET_MODE (operands[0]);
18802 bool sign_bit_compare_p = false;
18803 rtx op0 = XEXP (operands[1], 0);
18804 rtx op1 = XEXP (operands[1], 1);
18807 compare_op = ix86_expand_compare (code, op0, op1);
18808 compare_seq = get_insns ();
18811 compare_code = GET_CODE (compare_op);
18813 if ((op1 == const0_rtx && (code == GE || code == LT))
18814 || (op1 == constm1_rtx && (code == GT || code == LE)))
18815 sign_bit_compare_p = true;
18817 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18818 HImode insns, we'd be swallowed in word prefix ops. */
18820 if ((mode != HImode || TARGET_FAST_PREFIX)
18821 && (mode != (TARGET_64BIT ? TImode : DImode))
18822 && CONST_INT_P (operands[2])
18823 && CONST_INT_P (operands[3]))
18825 rtx out = operands[0];
18826 HOST_WIDE_INT ct = INTVAL (operands[2]);
18827 HOST_WIDE_INT cf = INTVAL (operands[3]);
18828 HOST_WIDE_INT diff;
18831 /* Sign bit compares are better done using shifts than we do by using
18833 if (sign_bit_compare_p
18834 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18836 /* Detect overlap between destination and compare sources. */
18839 if (!sign_bit_compare_p)
18842 bool fpcmp = false;
18844 compare_code = GET_CODE (compare_op);
18846 flags = XEXP (compare_op, 0);
18848 if (GET_MODE (flags) == CCFPmode
18849 || GET_MODE (flags) == CCFPUmode)
18853 = ix86_fp_compare_code_to_integer (compare_code);
18856 /* To simplify rest of code, restrict to the GEU case. */
18857 if (compare_code == LTU)
18859 HOST_WIDE_INT tmp = ct;
18862 compare_code = reverse_condition (compare_code);
18863 code = reverse_condition (code);
18868 PUT_CODE (compare_op,
18869 reverse_condition_maybe_unordered
18870 (GET_CODE (compare_op)));
18872 PUT_CODE (compare_op,
18873 reverse_condition (GET_CODE (compare_op)));
18877 if (reg_overlap_mentioned_p (out, op0)
18878 || reg_overlap_mentioned_p (out, op1))
18879 tmp = gen_reg_rtx (mode);
18881 if (mode == DImode)
18882 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18884 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18885 flags, compare_op));
18889 if (code == GT || code == GE)
18890 code = reverse_condition (code);
18893 HOST_WIDE_INT tmp = ct;
18898 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18911 tmp = expand_simple_binop (mode, PLUS,
18913 copy_rtx (tmp), 1, OPTAB_DIRECT);
18924 tmp = expand_simple_binop (mode, IOR,
18926 copy_rtx (tmp), 1, OPTAB_DIRECT);
18928 else if (diff == -1 && ct)
18938 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18940 tmp = expand_simple_binop (mode, PLUS,
18941 copy_rtx (tmp), GEN_INT (cf),
18942 copy_rtx (tmp), 1, OPTAB_DIRECT);
18950 * andl cf - ct, dest
18960 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18963 tmp = expand_simple_binop (mode, AND,
18965 gen_int_mode (cf - ct, mode),
18966 copy_rtx (tmp), 1, OPTAB_DIRECT);
18968 tmp = expand_simple_binop (mode, PLUS,
18969 copy_rtx (tmp), GEN_INT (ct),
18970 copy_rtx (tmp), 1, OPTAB_DIRECT);
18973 if (!rtx_equal_p (tmp, out))
18974 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18981 enum machine_mode cmp_mode = GET_MODE (op0);
18984 tmp = ct, ct = cf, cf = tmp;
18987 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18989 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18991 /* We may be reversing unordered compare to normal compare, that
18992 is not valid in general (we may convert non-trapping condition
18993 to trapping one), however on i386 we currently emit all
18994 comparisons unordered. */
18995 compare_code = reverse_condition_maybe_unordered (compare_code);
18996 code = reverse_condition_maybe_unordered (code);
19000 compare_code = reverse_condition (compare_code);
19001 code = reverse_condition (code);
19005 compare_code = UNKNOWN;
19006 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
19007 && CONST_INT_P (op1))
19009 if (op1 == const0_rtx
19010 && (code == LT || code == GE))
19011 compare_code = code;
19012 else if (op1 == constm1_rtx)
19016 else if (code == GT)
19021 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19022 if (compare_code != UNKNOWN
19023 && GET_MODE (op0) == GET_MODE (out)
19024 && (cf == -1 || ct == -1))
19026 /* If lea code below could be used, only optimize
19027 if it results in a 2 insn sequence. */
19029 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
19030 || diff == 3 || diff == 5 || diff == 9)
19031 || (compare_code == LT && ct == -1)
19032 || (compare_code == GE && cf == -1))
19035 * notl op1 (if necessary)
19043 code = reverse_condition (code);
19046 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
19048 out = expand_simple_binop (mode, IOR,
19050 out, 1, OPTAB_DIRECT);
19051 if (out != operands[0])
19052 emit_move_insn (operands[0], out);
19059 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
19060 || diff == 3 || diff == 5 || diff == 9)
19061 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
19063 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
19069 * lea cf(dest*(ct-cf)),dest
19073 * This also catches the degenerate setcc-only case.
19079 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19082 /* On x86_64 the lea instruction operates on Pmode, so we need
19083 to get arithmetics done in proper mode to match. */
19085 tmp = copy_rtx (out);
19089 out1 = copy_rtx (out);
19090 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
19094 tmp = gen_rtx_PLUS (mode, tmp, out1);
19100 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
19103 if (!rtx_equal_p (tmp, out))
19106 out = force_operand (tmp, copy_rtx (out));
19108 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
19110 if (!rtx_equal_p (out, operands[0]))
19111 emit_move_insn (operands[0], copy_rtx (out));
19117 * General case: Jumpful:
19118 * xorl dest,dest cmpl op1, op2
19119 * cmpl op1, op2 movl ct, dest
19120 * setcc dest jcc 1f
19121 * decl dest movl cf, dest
19122 * andl (cf-ct),dest 1:
19125 * Size 20. Size 14.
19127 * This is reasonably steep, but branch mispredict costs are
19128 * high on modern cpus, so consider failing only if optimizing
19132 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19133 && BRANCH_COST (optimize_insn_for_speed_p (),
19138 enum machine_mode cmp_mode = GET_MODE (op0);
19143 if (SCALAR_FLOAT_MODE_P (cmp_mode))
19145 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
19147 /* We may be reversing unordered compare to normal compare,
19148 that is not valid in general (we may convert non-trapping
19149 condition to trapping one), however on i386 we currently
19150 emit all comparisons unordered. */
19151 code = reverse_condition_maybe_unordered (code);
19155 code = reverse_condition (code);
19156 if (compare_code != UNKNOWN)
19157 compare_code = reverse_condition (compare_code);
19161 if (compare_code != UNKNOWN)
19163 /* notl op1 (if needed)
19168 For x < 0 (resp. x <= -1) there will be no notl,
19169 so if possible swap the constants to get rid of the
19171 True/false will be -1/0 while code below (store flag
19172 followed by decrement) is 0/-1, so the constants need
19173 to be exchanged once more. */
19175 if (compare_code == GE || !cf)
19177 code = reverse_condition (code);
19182 HOST_WIDE_INT tmp = cf;
19187 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
19191 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19193 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
19195 copy_rtx (out), 1, OPTAB_DIRECT);
19198 out = expand_simple_binop (mode, AND, copy_rtx (out),
19199 gen_int_mode (cf - ct, mode),
19200 copy_rtx (out), 1, OPTAB_DIRECT);
19202 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
19203 copy_rtx (out), 1, OPTAB_DIRECT);
19204 if (!rtx_equal_p (out, operands[0]))
19205 emit_move_insn (operands[0], copy_rtx (out));
19211 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19213 /* Try a few things more with specific constants and a variable. */
19216 rtx var, orig_out, out, tmp;
19218 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19221 /* If one of the two operands is an interesting constant, load a
19222 constant with the above and mask it in with a logical operation. */
19224 if (CONST_INT_P (operands[2]))
19227 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
19228 operands[3] = constm1_rtx, op = and_optab;
19229 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
19230 operands[3] = const0_rtx, op = ior_optab;
19234 else if (CONST_INT_P (operands[3]))
19237 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
19238 operands[2] = constm1_rtx, op = and_optab;
19239 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
19240 operands[2] = const0_rtx, op = ior_optab;
19247 orig_out = operands[0];
19248 tmp = gen_reg_rtx (mode);
19251 /* Recurse to get the constant loaded. */
19252 if (ix86_expand_int_movcc (operands) == 0)
19255 /* Mask in the interesting variable. */
19256 out = expand_binop (mode, op, var, tmp, orig_out, 0,
19258 if (!rtx_equal_p (out, orig_out))
19259 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
19265 * For comparison with above,
19275 if (! nonimmediate_operand (operands[2], mode))
19276 operands[2] = force_reg (mode, operands[2]);
19277 if (! nonimmediate_operand (operands[3], mode))
19278 operands[3] = force_reg (mode, operands[3]);
19280 if (! register_operand (operands[2], VOIDmode)
19282 || ! register_operand (operands[3], VOIDmode)))
19283 operands[2] = force_reg (mode, operands[2]);
19286 && ! register_operand (operands[3], VOIDmode))
19287 operands[3] = force_reg (mode, operands[3]);
19289 emit_insn (compare_seq);
19290 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19291 gen_rtx_IF_THEN_ELSE (mode,
19292 compare_op, operands[2],
19297 /* Swap, force into registers, or otherwise massage the two operands
19298 to an sse comparison with a mask result. Thus we differ a bit from
19299 ix86_prepare_fp_compare_args which expects to produce a flags result.
19301 The DEST operand exists to help determine whether to commute commutative
19302 operators. The POP0/POP1 operands are updated in place. The new
19303 comparison code is returned, or UNKNOWN if not implementable. */
19305 static enum rtx_code
19306 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
19307 rtx *pop0, rtx *pop1)
19315 /* AVX supports all the needed comparisons. */
19318 /* We have no LTGT as an operator. We could implement it with
19319 NE & ORDERED, but this requires an extra temporary. It's
19320 not clear that it's worth it. */
19327 /* These are supported directly. */
19334 /* AVX has 3 operand comparisons, no need to swap anything. */
19337 /* For commutative operators, try to canonicalize the destination
19338 operand to be first in the comparison - this helps reload to
19339 avoid extra moves. */
19340 if (!dest || !rtx_equal_p (dest, *pop1))
19348 /* These are not supported directly before AVX, and furthermore
19349 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19350 comparison operands to transform into something that is
19355 code = swap_condition (code);
19359 gcc_unreachable ();
19365 /* Detect conditional moves that exactly match min/max operational
19366 semantics. Note that this is IEEE safe, as long as we don't
19367 interchange the operands.
19369 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19370 and TRUE if the operation is successful and instructions are emitted. */
19373 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
19374 rtx cmp_op1, rtx if_true, rtx if_false)
19376 enum machine_mode mode;
19382 else if (code == UNGE)
19385 if_true = if_false;
19391 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
19393 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
19398 mode = GET_MODE (dest);
19400 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19401 but MODE may be a vector mode and thus not appropriate. */
19402 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
19404 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
19407 if_true = force_reg (mode, if_true);
19408 v = gen_rtvec (2, if_true, if_false);
19409 tmp = gen_rtx_UNSPEC (mode, v, u);
19413 code = is_min ? SMIN : SMAX;
19414 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
19417 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
19421 /* Expand an sse vector comparison. Return the register with the result. */
19424 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
19425 rtx op_true, rtx op_false)
19427 enum machine_mode mode = GET_MODE (dest);
19428 enum machine_mode cmp_mode = GET_MODE (cmp_op0);
19431 cmp_op0 = force_reg (cmp_mode, cmp_op0);
19432 if (!nonimmediate_operand (cmp_op1, cmp_mode))
19433 cmp_op1 = force_reg (cmp_mode, cmp_op1);
19436 || reg_overlap_mentioned_p (dest, op_true)
19437 || reg_overlap_mentioned_p (dest, op_false))
19438 dest = gen_reg_rtx (mode);
19440 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
19441 if (cmp_mode != mode)
19443 x = force_reg (cmp_mode, x);
19444 convert_move (dest, x, false);
19447 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19452 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19453 operations. This is used for both scalar and vector conditional moves. */
19456 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
19458 enum machine_mode mode = GET_MODE (dest);
19461 if (vector_all_ones_operand (op_true, mode)
19462 && rtx_equal_p (op_false, CONST0_RTX (mode)))
19464 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
19466 else if (op_false == CONST0_RTX (mode))
19468 op_true = force_reg (mode, op_true);
19469 x = gen_rtx_AND (mode, cmp, op_true);
19470 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19472 else if (op_true == CONST0_RTX (mode))
19474 op_false = force_reg (mode, op_false);
19475 x = gen_rtx_NOT (mode, cmp);
19476 x = gen_rtx_AND (mode, x, op_false);
19477 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19479 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
19481 op_false = force_reg (mode, op_false);
19482 x = gen_rtx_IOR (mode, cmp, op_false);
19483 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19485 else if (TARGET_XOP)
19487 op_true = force_reg (mode, op_true);
19489 if (!nonimmediate_operand (op_false, mode))
19490 op_false = force_reg (mode, op_false);
19492 emit_insn (gen_rtx_SET (mode, dest,
19493 gen_rtx_IF_THEN_ELSE (mode, cmp,
19499 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
19501 if (!nonimmediate_operand (op_true, mode))
19502 op_true = force_reg (mode, op_true);
19504 op_false = force_reg (mode, op_false);
19510 gen = gen_sse4_1_blendvps;
19514 gen = gen_sse4_1_blendvpd;
19522 gen = gen_sse4_1_pblendvb;
19523 dest = gen_lowpart (V16QImode, dest);
19524 op_false = gen_lowpart (V16QImode, op_false);
19525 op_true = gen_lowpart (V16QImode, op_true);
19526 cmp = gen_lowpart (V16QImode, cmp);
19531 gen = gen_avx_blendvps256;
19535 gen = gen_avx_blendvpd256;
19543 gen = gen_avx2_pblendvb;
19544 dest = gen_lowpart (V32QImode, dest);
19545 op_false = gen_lowpart (V32QImode, op_false);
19546 op_true = gen_lowpart (V32QImode, op_true);
19547 cmp = gen_lowpart (V32QImode, cmp);
19555 emit_insn (gen (dest, op_false, op_true, cmp));
19558 op_true = force_reg (mode, op_true);
19560 t2 = gen_reg_rtx (mode);
19562 t3 = gen_reg_rtx (mode);
19566 x = gen_rtx_AND (mode, op_true, cmp);
19567 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
19569 x = gen_rtx_NOT (mode, cmp);
19570 x = gen_rtx_AND (mode, x, op_false);
19571 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
19573 x = gen_rtx_IOR (mode, t3, t2);
19574 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19579 /* Expand a floating-point conditional move. Return true if successful. */
19582 ix86_expand_fp_movcc (rtx operands[])
19584 enum machine_mode mode = GET_MODE (operands[0]);
19585 enum rtx_code code = GET_CODE (operands[1]);
19586 rtx tmp, compare_op;
19587 rtx op0 = XEXP (operands[1], 0);
19588 rtx op1 = XEXP (operands[1], 1);
19590 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
19592 enum machine_mode cmode;
19594 /* Since we've no cmove for sse registers, don't force bad register
19595 allocation just to gain access to it. Deny movcc when the
19596 comparison mode doesn't match the move mode. */
19597 cmode = GET_MODE (op0);
19598 if (cmode == VOIDmode)
19599 cmode = GET_MODE (op1);
19603 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
19604 if (code == UNKNOWN)
19607 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
19608 operands[2], operands[3]))
19611 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
19612 operands[2], operands[3]);
19613 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
19617 /* The floating point conditional move instructions don't directly
19618 support conditions resulting from a signed integer comparison. */
19620 compare_op = ix86_expand_compare (code, op0, op1);
19621 if (!fcmov_comparison_operator (compare_op, VOIDmode))
19623 tmp = gen_reg_rtx (QImode);
19624 ix86_expand_setcc (tmp, code, op0, op1);
19626 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
19629 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19630 gen_rtx_IF_THEN_ELSE (mode, compare_op,
19631 operands[2], operands[3])));
19636 /* Expand a floating-point vector conditional move; a vcond operation
19637 rather than a movcc operation. */
19640 ix86_expand_fp_vcond (rtx operands[])
19642 enum rtx_code code = GET_CODE (operands[3]);
19645 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
19646 &operands[4], &operands[5]);
19647 if (code == UNKNOWN)
19650 switch (GET_CODE (operands[3]))
19653 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
19654 operands[5], operands[0], operands[0]);
19655 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
19656 operands[5], operands[1], operands[2]);
19660 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
19661 operands[5], operands[0], operands[0]);
19662 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
19663 operands[5], operands[1], operands[2]);
19667 gcc_unreachable ();
19669 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
19671 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19675 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
19676 operands[5], operands[1], operands[2]))
19679 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
19680 operands[1], operands[2]);
19681 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19685 /* Expand a signed/unsigned integral vector conditional move. */
19688 ix86_expand_int_vcond (rtx operands[])
19690 enum machine_mode data_mode = GET_MODE (operands[0]);
19691 enum machine_mode mode = GET_MODE (operands[4]);
19692 enum rtx_code code = GET_CODE (operands[3]);
19693 bool negate = false;
19696 cop0 = operands[4];
19697 cop1 = operands[5];
19699 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
19700 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
19701 if ((code == LT || code == GE)
19702 && data_mode == mode
19703 && cop1 == CONST0_RTX (mode)
19704 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
19705 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
19706 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
19707 && (GET_MODE_SIZE (data_mode) == 16
19708 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
19710 rtx negop = operands[2 - (code == LT)];
19711 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
19712 if (negop == CONST1_RTX (data_mode))
19714 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
19715 operands[0], 1, OPTAB_DIRECT);
19716 if (res != operands[0])
19717 emit_move_insn (operands[0], res);
19720 else if (GET_MODE_INNER (data_mode) != DImode
19721 && vector_all_ones_operand (negop, data_mode))
19723 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
19724 operands[0], 0, OPTAB_DIRECT);
19725 if (res != operands[0])
19726 emit_move_insn (operands[0], res);
19731 if (!nonimmediate_operand (cop1, mode))
19732 cop1 = force_reg (mode, cop1);
19733 if (!general_operand (operands[1], data_mode))
19734 operands[1] = force_reg (data_mode, operands[1]);
19735 if (!general_operand (operands[2], data_mode))
19736 operands[2] = force_reg (data_mode, operands[2]);
19738 /* XOP supports all of the comparisons on all 128-bit vector int types. */
19740 && (mode == V16QImode || mode == V8HImode
19741 || mode == V4SImode || mode == V2DImode))
19745 /* Canonicalize the comparison to EQ, GT, GTU. */
19756 code = reverse_condition (code);
19762 code = reverse_condition (code);
19768 code = swap_condition (code);
19769 x = cop0, cop0 = cop1, cop1 = x;
19773 gcc_unreachable ();
19776 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19777 if (mode == V2DImode)
19782 /* SSE4.1 supports EQ. */
19783 if (!TARGET_SSE4_1)
19789 /* SSE4.2 supports GT/GTU. */
19790 if (!TARGET_SSE4_2)
19795 gcc_unreachable ();
19799 /* Unsigned parallel compare is not supported by the hardware.
19800 Play some tricks to turn this into a signed comparison
19804 cop0 = force_reg (mode, cop0);
19814 rtx (*gen_sub3) (rtx, rtx, rtx);
19818 case V8SImode: gen_sub3 = gen_subv8si3; break;
19819 case V4DImode: gen_sub3 = gen_subv4di3; break;
19820 case V4SImode: gen_sub3 = gen_subv4si3; break;
19821 case V2DImode: gen_sub3 = gen_subv2di3; break;
19823 gcc_unreachable ();
19825 /* Subtract (-(INT MAX) - 1) from both operands to make
19827 mask = ix86_build_signbit_mask (mode, true, false);
19828 t1 = gen_reg_rtx (mode);
19829 emit_insn (gen_sub3 (t1, cop0, mask));
19831 t2 = gen_reg_rtx (mode);
19832 emit_insn (gen_sub3 (t2, cop1, mask));
19844 /* Perform a parallel unsigned saturating subtraction. */
19845 x = gen_reg_rtx (mode);
19846 emit_insn (gen_rtx_SET (VOIDmode, x,
19847 gen_rtx_US_MINUS (mode, cop0, cop1)));
19850 cop1 = CONST0_RTX (mode);
19856 gcc_unreachable ();
19861 /* Allow the comparison to be done in one mode, but the movcc to
19862 happen in another mode. */
19863 if (data_mode == mode)
19865 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19866 operands[1+negate], operands[2-negate]);
19870 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
19871 x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]),
19873 operands[1+negate], operands[2-negate]);
19874 x = gen_lowpart (data_mode, x);
19877 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19878 operands[2-negate]);
19882 /* Expand a variable vector permutation. */
19885 ix86_expand_vec_perm (rtx operands[])
19887 rtx target = operands[0];
19888 rtx op0 = operands[1];
19889 rtx op1 = operands[2];
19890 rtx mask = operands[3];
19891 rtx t1, t2, t3, t4, vt, vt2, vec[32];
19892 enum machine_mode mode = GET_MODE (op0);
19893 enum machine_mode maskmode = GET_MODE (mask);
19895 bool one_operand_shuffle = rtx_equal_p (op0, op1);
19897 /* Number of elements in the vector. */
19898 w = GET_MODE_NUNITS (mode);
19899 e = GET_MODE_UNIT_SIZE (mode);
19900 gcc_assert (w <= 32);
19904 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
19906 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19907 an constant shuffle operand. With a tiny bit of effort we can
19908 use VPERMD instead. A re-interpretation stall for V4DFmode is
19909 unfortunate but there's no avoiding it.
19910 Similarly for V16HImode we don't have instructions for variable
19911 shuffling, while for V32QImode we can use after preparing suitable
19912 masks vpshufb; vpshufb; vpermq; vpor. */
19914 if (mode == V16HImode)
19916 maskmode = mode = V32QImode;
19922 maskmode = mode = V8SImode;
19926 t1 = gen_reg_rtx (maskmode);
19928 /* Replicate the low bits of the V4DImode mask into V8SImode:
19930 t1 = { A A B B C C D D }. */
19931 for (i = 0; i < w / 2; ++i)
19932 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
19933 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19934 vt = force_reg (maskmode, vt);
19935 mask = gen_lowpart (maskmode, mask);
19936 if (maskmode == V8SImode)
19937 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
19939 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
19941 /* Multiply the shuffle indicies by two. */
19942 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
19945 /* Add one to the odd shuffle indicies:
19946 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19947 for (i = 0; i < w / 2; ++i)
19949 vec[i * 2] = const0_rtx;
19950 vec[i * 2 + 1] = const1_rtx;
19952 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19953 vt = force_const_mem (maskmode, vt);
19954 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
19957 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19958 operands[3] = mask = t1;
19959 target = gen_lowpart (mode, target);
19960 op0 = gen_lowpart (mode, op0);
19961 op1 = gen_lowpart (mode, op1);
19967 /* The VPERMD and VPERMPS instructions already properly ignore
19968 the high bits of the shuffle elements. No need for us to
19969 perform an AND ourselves. */
19970 if (one_operand_shuffle)
19971 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
19974 t1 = gen_reg_rtx (V8SImode);
19975 t2 = gen_reg_rtx (V8SImode);
19976 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
19977 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
19983 mask = gen_lowpart (V8SFmode, mask);
19984 if (one_operand_shuffle)
19985 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
19988 t1 = gen_reg_rtx (V8SFmode);
19989 t2 = gen_reg_rtx (V8SFmode);
19990 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
19991 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
19997 /* By combining the two 128-bit input vectors into one 256-bit
19998 input vector, we can use VPERMD and VPERMPS for the full
19999 two-operand shuffle. */
20000 t1 = gen_reg_rtx (V8SImode);
20001 t2 = gen_reg_rtx (V8SImode);
20002 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
20003 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
20004 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
20005 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
20009 t1 = gen_reg_rtx (V8SFmode);
20010 t2 = gen_reg_rtx (V8SImode);
20011 mask = gen_lowpart (V4SImode, mask);
20012 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
20013 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
20014 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
20015 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
20019 t1 = gen_reg_rtx (V32QImode);
20020 t2 = gen_reg_rtx (V32QImode);
20021 t3 = gen_reg_rtx (V32QImode);
20022 vt2 = GEN_INT (128);
20023 for (i = 0; i < 32; i++)
20025 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
20026 vt = force_reg (V32QImode, vt);
20027 for (i = 0; i < 32; i++)
20028 vec[i] = i < 16 ? vt2 : const0_rtx;
20029 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
20030 vt2 = force_reg (V32QImode, vt2);
20031 /* From mask create two adjusted masks, which contain the same
20032 bits as mask in the low 7 bits of each vector element.
20033 The first mask will have the most significant bit clear
20034 if it requests element from the same 128-bit lane
20035 and MSB set if it requests element from the other 128-bit lane.
20036 The second mask will have the opposite values of the MSB,
20037 and additionally will have its 128-bit lanes swapped.
20038 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20039 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20040 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20041 stands for other 12 bytes. */
20042 /* The bit whether element is from the same lane or the other
20043 lane is bit 4, so shift it up by 3 to the MSB position. */
20044 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode, t1),
20045 gen_lowpart (V4DImode, mask),
20047 /* Clear MSB bits from the mask just in case it had them set. */
20048 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
20049 /* After this t1 will have MSB set for elements from other lane. */
20050 emit_insn (gen_xorv32qi3 (t1, t1, vt2));
20051 /* Clear bits other than MSB. */
20052 emit_insn (gen_andv32qi3 (t1, t1, vt));
20053 /* Or in the lower bits from mask into t3. */
20054 emit_insn (gen_iorv32qi3 (t3, t1, t2));
20055 /* And invert MSB bits in t1, so MSB is set for elements from the same
20057 emit_insn (gen_xorv32qi3 (t1, t1, vt));
20058 /* Swap 128-bit lanes in t3. */
20059 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20060 gen_lowpart (V4DImode, t3),
20061 const2_rtx, GEN_INT (3),
20062 const0_rtx, const1_rtx));
20063 /* And or in the lower bits from mask into t1. */
20064 emit_insn (gen_iorv32qi3 (t1, t1, t2));
20065 if (one_operand_shuffle)
20067 /* Each of these shuffles will put 0s in places where
20068 element from the other 128-bit lane is needed, otherwise
20069 will shuffle in the requested value. */
20070 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, t3));
20071 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
20072 /* For t3 the 128-bit lanes are swapped again. */
20073 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20074 gen_lowpart (V4DImode, t3),
20075 const2_rtx, GEN_INT (3),
20076 const0_rtx, const1_rtx));
20077 /* And oring both together leads to the result. */
20078 emit_insn (gen_iorv32qi3 (target, t1, t3));
20082 t4 = gen_reg_rtx (V32QImode);
20083 /* Similarly to the above one_operand_shuffle code,
20084 just for repeated twice for each operand. merge_two:
20085 code will merge the two results together. */
20086 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, t3));
20087 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, t3));
20088 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
20089 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
20090 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t4),
20091 gen_lowpart (V4DImode, t4),
20092 const2_rtx, GEN_INT (3),
20093 const0_rtx, const1_rtx));
20094 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20095 gen_lowpart (V4DImode, t3),
20096 const2_rtx, GEN_INT (3),
20097 const0_rtx, const1_rtx));
20098 emit_insn (gen_iorv32qi3 (t4, t2, t4));
20099 emit_insn (gen_iorv32qi3 (t3, t1, t3));
20105 gcc_assert (GET_MODE_SIZE (mode) <= 16);
20112 /* The XOP VPPERM insn supports three inputs. By ignoring the
20113 one_operand_shuffle special case, we avoid creating another
20114 set of constant vectors in memory. */
20115 one_operand_shuffle = false;
20117 /* mask = mask & {2*w-1, ...} */
20118 vt = GEN_INT (2*w - 1);
20122 /* mask = mask & {w-1, ...} */
20123 vt = GEN_INT (w - 1);
20126 for (i = 0; i < w; i++)
20128 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20129 mask = expand_simple_binop (maskmode, AND, mask, vt,
20130 NULL_RTX, 0, OPTAB_DIRECT);
20132 /* For non-QImode operations, convert the word permutation control
20133 into a byte permutation control. */
20134 if (mode != V16QImode)
20136 mask = expand_simple_binop (maskmode, ASHIFT, mask,
20137 GEN_INT (exact_log2 (e)),
20138 NULL_RTX, 0, OPTAB_DIRECT);
20140 /* Convert mask to vector of chars. */
20141 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
20143 /* Replicate each of the input bytes into byte positions:
20144 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20145 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20146 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20147 for (i = 0; i < 16; ++i)
20148 vec[i] = GEN_INT (i/e * e);
20149 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20150 vt = force_const_mem (V16QImode, vt);
20152 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
20154 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
20156 /* Convert it into the byte positions by doing
20157 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20158 for (i = 0; i < 16; ++i)
20159 vec[i] = GEN_INT (i % e);
20160 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20161 vt = force_const_mem (V16QImode, vt);
20162 emit_insn (gen_addv16qi3 (mask, mask, vt));
20165 /* The actual shuffle operations all operate on V16QImode. */
20166 op0 = gen_lowpart (V16QImode, op0);
20167 op1 = gen_lowpart (V16QImode, op1);
20168 target = gen_lowpart (V16QImode, target);
20172 emit_insn (gen_xop_pperm (target, op0, op1, mask));
20174 else if (one_operand_shuffle)
20176 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
20183 /* Shuffle the two input vectors independently. */
20184 t1 = gen_reg_rtx (V16QImode);
20185 t2 = gen_reg_rtx (V16QImode);
20186 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
20187 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
20190 /* Then merge them together. The key is whether any given control
20191 element contained a bit set that indicates the second word. */
20192 mask = operands[3];
20194 if (maskmode == V2DImode && !TARGET_SSE4_1)
20196 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20197 more shuffle to convert the V2DI input mask into a V4SI
20198 input mask. At which point the masking that expand_int_vcond
20199 will work as desired. */
20200 rtx t3 = gen_reg_rtx (V4SImode);
20201 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
20202 const0_rtx, const0_rtx,
20203 const2_rtx, const2_rtx));
20205 maskmode = V4SImode;
20209 for (i = 0; i < w; i++)
20211 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20212 vt = force_reg (maskmode, vt);
20213 mask = expand_simple_binop (maskmode, AND, mask, vt,
20214 NULL_RTX, 0, OPTAB_DIRECT);
20216 xops[0] = gen_lowpart (mode, operands[0]);
20217 xops[1] = gen_lowpart (mode, t2);
20218 xops[2] = gen_lowpart (mode, t1);
20219 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
20222 ok = ix86_expand_int_vcond (xops);
20227 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20228 true if we should do zero extension, else sign extension. HIGH_P is
20229 true if we want the N/2 high elements, else the low elements. */
20232 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
20234 enum machine_mode imode = GET_MODE (operands[1]);
20239 rtx (*unpack)(rtx, rtx);
20240 rtx (*extract)(rtx, rtx) = NULL;
20241 enum machine_mode halfmode = BLKmode;
20247 unpack = gen_avx2_zero_extendv16qiv16hi2;
20249 unpack = gen_avx2_sign_extendv16qiv16hi2;
20250 halfmode = V16QImode;
20252 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
20256 unpack = gen_avx2_zero_extendv8hiv8si2;
20258 unpack = gen_avx2_sign_extendv8hiv8si2;
20259 halfmode = V8HImode;
20261 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
20265 unpack = gen_avx2_zero_extendv4siv4di2;
20267 unpack = gen_avx2_sign_extendv4siv4di2;
20268 halfmode = V4SImode;
20270 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
20274 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
20276 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
20280 unpack = gen_sse4_1_zero_extendv4hiv4si2;
20282 unpack = gen_sse4_1_sign_extendv4hiv4si2;
20286 unpack = gen_sse4_1_zero_extendv2siv2di2;
20288 unpack = gen_sse4_1_sign_extendv2siv2di2;
20291 gcc_unreachable ();
20294 if (GET_MODE_SIZE (imode) == 32)
20296 tmp = gen_reg_rtx (halfmode);
20297 emit_insn (extract (tmp, operands[1]));
20301 /* Shift higher 8 bytes to lower 8 bytes. */
20302 tmp = gen_reg_rtx (imode);
20303 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
20304 gen_lowpart (V1TImode, operands[1]),
20310 emit_insn (unpack (operands[0], tmp));
20314 rtx (*unpack)(rtx, rtx, rtx);
20320 unpack = gen_vec_interleave_highv16qi;
20322 unpack = gen_vec_interleave_lowv16qi;
20326 unpack = gen_vec_interleave_highv8hi;
20328 unpack = gen_vec_interleave_lowv8hi;
20332 unpack = gen_vec_interleave_highv4si;
20334 unpack = gen_vec_interleave_lowv4si;
20337 gcc_unreachable ();
20340 dest = gen_lowpart (imode, operands[0]);
20343 tmp = force_reg (imode, CONST0_RTX (imode));
20345 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
20346 operands[1], pc_rtx, pc_rtx);
20348 emit_insn (unpack (dest, operands[1], tmp));
20352 /* Expand conditional increment or decrement using adb/sbb instructions.
20353 The default case using setcc followed by the conditional move can be
20354 done by generic code. */
20356 ix86_expand_int_addcc (rtx operands[])
20358 enum rtx_code code = GET_CODE (operands[1]);
20360 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
20362 rtx val = const0_rtx;
20363 bool fpcmp = false;
20364 enum machine_mode mode;
20365 rtx op0 = XEXP (operands[1], 0);
20366 rtx op1 = XEXP (operands[1], 1);
20368 if (operands[3] != const1_rtx
20369 && operands[3] != constm1_rtx)
20371 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20373 code = GET_CODE (compare_op);
20375 flags = XEXP (compare_op, 0);
20377 if (GET_MODE (flags) == CCFPmode
20378 || GET_MODE (flags) == CCFPUmode)
20381 code = ix86_fp_compare_code_to_integer (code);
20388 PUT_CODE (compare_op,
20389 reverse_condition_maybe_unordered
20390 (GET_CODE (compare_op)));
20392 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
20395 mode = GET_MODE (operands[0]);
20397 /* Construct either adc or sbb insn. */
20398 if ((code == LTU) == (operands[3] == constm1_rtx))
20403 insn = gen_subqi3_carry;
20406 insn = gen_subhi3_carry;
20409 insn = gen_subsi3_carry;
20412 insn = gen_subdi3_carry;
20415 gcc_unreachable ();
20423 insn = gen_addqi3_carry;
20426 insn = gen_addhi3_carry;
20429 insn = gen_addsi3_carry;
20432 insn = gen_adddi3_carry;
20435 gcc_unreachable ();
20438 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
20444 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20445 but works for floating pointer parameters and nonoffsetable memories.
20446 For pushes, it returns just stack offsets; the values will be saved
20447 in the right order. Maximally three parts are generated. */
20450 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
20455 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
20457 size = (GET_MODE_SIZE (mode) + 4) / 8;
20459 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
20460 gcc_assert (size >= 2 && size <= 4);
20462 /* Optimize constant pool reference to immediates. This is used by fp
20463 moves, that force all constants to memory to allow combining. */
20464 if (MEM_P (operand) && MEM_READONLY_P (operand))
20466 rtx tmp = maybe_get_pool_constant (operand);
20471 if (MEM_P (operand) && !offsettable_memref_p (operand))
20473 /* The only non-offsetable memories we handle are pushes. */
20474 int ok = push_operand (operand, VOIDmode);
20478 operand = copy_rtx (operand);
20479 PUT_MODE (operand, Pmode);
20480 parts[0] = parts[1] = parts[2] = parts[3] = operand;
20484 if (GET_CODE (operand) == CONST_VECTOR)
20486 enum machine_mode imode = int_mode_for_mode (mode);
20487 /* Caution: if we looked through a constant pool memory above,
20488 the operand may actually have a different mode now. That's
20489 ok, since we want to pun this all the way back to an integer. */
20490 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
20491 gcc_assert (operand != NULL);
20497 if (mode == DImode)
20498 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
20503 if (REG_P (operand))
20505 gcc_assert (reload_completed);
20506 for (i = 0; i < size; i++)
20507 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
20509 else if (offsettable_memref_p (operand))
20511 operand = adjust_address (operand, SImode, 0);
20512 parts[0] = operand;
20513 for (i = 1; i < size; i++)
20514 parts[i] = adjust_address (operand, SImode, 4 * i);
20516 else if (GET_CODE (operand) == CONST_DOUBLE)
20521 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
20525 real_to_target (l, &r, mode);
20526 parts[3] = gen_int_mode (l[3], SImode);
20527 parts[2] = gen_int_mode (l[2], SImode);
20530 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
20531 parts[2] = gen_int_mode (l[2], SImode);
20534 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
20537 gcc_unreachable ();
20539 parts[1] = gen_int_mode (l[1], SImode);
20540 parts[0] = gen_int_mode (l[0], SImode);
20543 gcc_unreachable ();
20548 if (mode == TImode)
20549 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
20550 if (mode == XFmode || mode == TFmode)
20552 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
20553 if (REG_P (operand))
20555 gcc_assert (reload_completed);
20556 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
20557 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
20559 else if (offsettable_memref_p (operand))
20561 operand = adjust_address (operand, DImode, 0);
20562 parts[0] = operand;
20563 parts[1] = adjust_address (operand, upper_mode, 8);
20565 else if (GET_CODE (operand) == CONST_DOUBLE)
20570 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
20571 real_to_target (l, &r, mode);
20573 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20574 if (HOST_BITS_PER_WIDE_INT >= 64)
20577 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
20578 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
20581 parts[0] = immed_double_const (l[0], l[1], DImode);
20583 if (upper_mode == SImode)
20584 parts[1] = gen_int_mode (l[2], SImode);
20585 else if (HOST_BITS_PER_WIDE_INT >= 64)
20588 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
20589 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
20592 parts[1] = immed_double_const (l[2], l[3], DImode);
20595 gcc_unreachable ();
20602 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20603 Return false when normal moves are needed; true when all required
20604 insns have been emitted. Operands 2-4 contain the input values
20605 int the correct order; operands 5-7 contain the output values. */
20608 ix86_split_long_move (rtx operands[])
20613 int collisions = 0;
20614 enum machine_mode mode = GET_MODE (operands[0]);
20615 bool collisionparts[4];
20617 /* The DFmode expanders may ask us to move double.
20618 For 64bit target this is single move. By hiding the fact
20619 here we simplify i386.md splitters. */
20620 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
20622 /* Optimize constant pool reference to immediates. This is used by
20623 fp moves, that force all constants to memory to allow combining. */
20625 if (MEM_P (operands[1])
20626 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
20627 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
20628 operands[1] = get_pool_constant (XEXP (operands[1], 0));
20629 if (push_operand (operands[0], VOIDmode))
20631 operands[0] = copy_rtx (operands[0]);
20632 PUT_MODE (operands[0], Pmode);
20635 operands[0] = gen_lowpart (DImode, operands[0]);
20636 operands[1] = gen_lowpart (DImode, operands[1]);
20637 emit_move_insn (operands[0], operands[1]);
20641 /* The only non-offsettable memory we handle is push. */
20642 if (push_operand (operands[0], VOIDmode))
20645 gcc_assert (!MEM_P (operands[0])
20646 || offsettable_memref_p (operands[0]));
20648 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
20649 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
20651 /* When emitting push, take care for source operands on the stack. */
20652 if (push && MEM_P (operands[1])
20653 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
20655 rtx src_base = XEXP (part[1][nparts - 1], 0);
20657 /* Compensate for the stack decrement by 4. */
20658 if (!TARGET_64BIT && nparts == 3
20659 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
20660 src_base = plus_constant (src_base, 4);
20662 /* src_base refers to the stack pointer and is
20663 automatically decreased by emitted push. */
20664 for (i = 0; i < nparts; i++)
20665 part[1][i] = change_address (part[1][i],
20666 GET_MODE (part[1][i]), src_base);
20669 /* We need to do copy in the right order in case an address register
20670 of the source overlaps the destination. */
20671 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
20675 for (i = 0; i < nparts; i++)
20678 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
20679 if (collisionparts[i])
20683 /* Collision in the middle part can be handled by reordering. */
20684 if (collisions == 1 && nparts == 3 && collisionparts [1])
20686 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20687 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20689 else if (collisions == 1
20691 && (collisionparts [1] || collisionparts [2]))
20693 if (collisionparts [1])
20695 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20696 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20700 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
20701 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
20705 /* If there are more collisions, we can't handle it by reordering.
20706 Do an lea to the last part and use only one colliding move. */
20707 else if (collisions > 1)
20713 base = part[0][nparts - 1];
20715 /* Handle the case when the last part isn't valid for lea.
20716 Happens in 64-bit mode storing the 12-byte XFmode. */
20717 if (GET_MODE (base) != Pmode)
20718 base = gen_rtx_REG (Pmode, REGNO (base));
20720 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
20721 part[1][0] = replace_equiv_address (part[1][0], base);
20722 for (i = 1; i < nparts; i++)
20724 tmp = plus_constant (base, UNITS_PER_WORD * i);
20725 part[1][i] = replace_equiv_address (part[1][i], tmp);
20736 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
20737 emit_insn (gen_addsi3 (stack_pointer_rtx,
20738 stack_pointer_rtx, GEN_INT (-4)));
20739 emit_move_insn (part[0][2], part[1][2]);
20741 else if (nparts == 4)
20743 emit_move_insn (part[0][3], part[1][3]);
20744 emit_move_insn (part[0][2], part[1][2]);
20749 /* In 64bit mode we don't have 32bit push available. In case this is
20750 register, it is OK - we will just use larger counterpart. We also
20751 retype memory - these comes from attempt to avoid REX prefix on
20752 moving of second half of TFmode value. */
20753 if (GET_MODE (part[1][1]) == SImode)
20755 switch (GET_CODE (part[1][1]))
20758 part[1][1] = adjust_address (part[1][1], DImode, 0);
20762 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
20766 gcc_unreachable ();
20769 if (GET_MODE (part[1][0]) == SImode)
20770 part[1][0] = part[1][1];
20773 emit_move_insn (part[0][1], part[1][1]);
20774 emit_move_insn (part[0][0], part[1][0]);
20778 /* Choose correct order to not overwrite the source before it is copied. */
20779 if ((REG_P (part[0][0])
20780 && REG_P (part[1][1])
20781 && (REGNO (part[0][0]) == REGNO (part[1][1])
20783 && REGNO (part[0][0]) == REGNO (part[1][2]))
20785 && REGNO (part[0][0]) == REGNO (part[1][3]))))
20787 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
20789 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
20791 operands[2 + i] = part[0][j];
20792 operands[6 + i] = part[1][j];
20797 for (i = 0; i < nparts; i++)
20799 operands[2 + i] = part[0][i];
20800 operands[6 + i] = part[1][i];
20804 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20805 if (optimize_insn_for_size_p ())
20807 for (j = 0; j < nparts - 1; j++)
20808 if (CONST_INT_P (operands[6 + j])
20809 && operands[6 + j] != const0_rtx
20810 && REG_P (operands[2 + j]))
20811 for (i = j; i < nparts - 1; i++)
20812 if (CONST_INT_P (operands[7 + i])
20813 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
20814 operands[7 + i] = operands[2 + j];
20817 for (i = 0; i < nparts; i++)
20818 emit_move_insn (operands[2 + i], operands[6 + i]);
20823 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20824 left shift by a constant, either using a single shift or
20825 a sequence of add instructions. */
20828 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
20830 rtx (*insn)(rtx, rtx, rtx);
20833 || (count * ix86_cost->add <= ix86_cost->shift_const
20834 && !optimize_insn_for_size_p ()))
20836 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
20837 while (count-- > 0)
20838 emit_insn (insn (operand, operand, operand));
20842 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20843 emit_insn (insn (operand, operand, GEN_INT (count)));
20848 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
20850 rtx (*gen_ashl3)(rtx, rtx, rtx);
20851 rtx (*gen_shld)(rtx, rtx, rtx);
20852 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20854 rtx low[2], high[2];
20857 if (CONST_INT_P (operands[2]))
20859 split_double_mode (mode, operands, 2, low, high);
20860 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20862 if (count >= half_width)
20864 emit_move_insn (high[0], low[1]);
20865 emit_move_insn (low[0], const0_rtx);
20867 if (count > half_width)
20868 ix86_expand_ashl_const (high[0], count - half_width, mode);
20872 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20874 if (!rtx_equal_p (operands[0], operands[1]))
20875 emit_move_insn (operands[0], operands[1]);
20877 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
20878 ix86_expand_ashl_const (low[0], count, mode);
20883 split_double_mode (mode, operands, 1, low, high);
20885 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20887 if (operands[1] == const1_rtx)
20889 /* Assuming we've chosen a QImode capable registers, then 1 << N
20890 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20891 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
20893 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
20895 ix86_expand_clear (low[0]);
20896 ix86_expand_clear (high[0]);
20897 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
20899 d = gen_lowpart (QImode, low[0]);
20900 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20901 s = gen_rtx_EQ (QImode, flags, const0_rtx);
20902 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20904 d = gen_lowpart (QImode, high[0]);
20905 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20906 s = gen_rtx_NE (QImode, flags, const0_rtx);
20907 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20910 /* Otherwise, we can get the same results by manually performing
20911 a bit extract operation on bit 5/6, and then performing the two
20912 shifts. The two methods of getting 0/1 into low/high are exactly
20913 the same size. Avoiding the shift in the bit extract case helps
20914 pentium4 a bit; no one else seems to care much either way. */
20917 enum machine_mode half_mode;
20918 rtx (*gen_lshr3)(rtx, rtx, rtx);
20919 rtx (*gen_and3)(rtx, rtx, rtx);
20920 rtx (*gen_xor3)(rtx, rtx, rtx);
20921 HOST_WIDE_INT bits;
20924 if (mode == DImode)
20926 half_mode = SImode;
20927 gen_lshr3 = gen_lshrsi3;
20928 gen_and3 = gen_andsi3;
20929 gen_xor3 = gen_xorsi3;
20934 half_mode = DImode;
20935 gen_lshr3 = gen_lshrdi3;
20936 gen_and3 = gen_anddi3;
20937 gen_xor3 = gen_xordi3;
20941 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
20942 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
20944 x = gen_lowpart (half_mode, operands[2]);
20945 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
20947 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
20948 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
20949 emit_move_insn (low[0], high[0]);
20950 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
20953 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20954 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
20958 if (operands[1] == constm1_rtx)
20960 /* For -1 << N, we can avoid the shld instruction, because we
20961 know that we're shifting 0...31/63 ones into a -1. */
20962 emit_move_insn (low[0], constm1_rtx);
20963 if (optimize_insn_for_size_p ())
20964 emit_move_insn (high[0], low[0]);
20966 emit_move_insn (high[0], constm1_rtx);
20970 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20972 if (!rtx_equal_p (operands[0], operands[1]))
20973 emit_move_insn (operands[0], operands[1]);
20975 split_double_mode (mode, operands, 1, low, high);
20976 emit_insn (gen_shld (high[0], low[0], operands[2]));
20979 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20981 if (TARGET_CMOVE && scratch)
20983 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20984 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20986 ix86_expand_clear (scratch);
20987 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
20991 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
20992 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20994 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
20999 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
21001 rtx (*gen_ashr3)(rtx, rtx, rtx)
21002 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
21003 rtx (*gen_shrd)(rtx, rtx, rtx);
21004 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21006 rtx low[2], high[2];
21009 if (CONST_INT_P (operands[2]))
21011 split_double_mode (mode, operands, 2, low, high);
21012 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21014 if (count == GET_MODE_BITSIZE (mode) - 1)
21016 emit_move_insn (high[0], high[1]);
21017 emit_insn (gen_ashr3 (high[0], high[0],
21018 GEN_INT (half_width - 1)));
21019 emit_move_insn (low[0], high[0]);
21022 else if (count >= half_width)
21024 emit_move_insn (low[0], high[1]);
21025 emit_move_insn (high[0], low[0]);
21026 emit_insn (gen_ashr3 (high[0], high[0],
21027 GEN_INT (half_width - 1)));
21029 if (count > half_width)
21030 emit_insn (gen_ashr3 (low[0], low[0],
21031 GEN_INT (count - half_width)));
21035 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21037 if (!rtx_equal_p (operands[0], operands[1]))
21038 emit_move_insn (operands[0], operands[1]);
21040 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
21041 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
21046 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21048 if (!rtx_equal_p (operands[0], operands[1]))
21049 emit_move_insn (operands[0], operands[1]);
21051 split_double_mode (mode, operands, 1, low, high);
21053 emit_insn (gen_shrd (low[0], high[0], operands[2]));
21054 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
21056 if (TARGET_CMOVE && scratch)
21058 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21059 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21061 emit_move_insn (scratch, high[0]);
21062 emit_insn (gen_ashr3 (scratch, scratch,
21063 GEN_INT (half_width - 1)));
21064 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21069 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
21070 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
21072 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
21078 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
21080 rtx (*gen_lshr3)(rtx, rtx, rtx)
21081 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
21082 rtx (*gen_shrd)(rtx, rtx, rtx);
21083 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21085 rtx low[2], high[2];
21088 if (CONST_INT_P (operands[2]))
21090 split_double_mode (mode, operands, 2, low, high);
21091 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21093 if (count >= half_width)
21095 emit_move_insn (low[0], high[1]);
21096 ix86_expand_clear (high[0]);
21098 if (count > half_width)
21099 emit_insn (gen_lshr3 (low[0], low[0],
21100 GEN_INT (count - half_width)));
21104 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21106 if (!rtx_equal_p (operands[0], operands[1]))
21107 emit_move_insn (operands[0], operands[1]);
21109 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
21110 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
21115 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21117 if (!rtx_equal_p (operands[0], operands[1]))
21118 emit_move_insn (operands[0], operands[1]);
21120 split_double_mode (mode, operands, 1, low, high);
21122 emit_insn (gen_shrd (low[0], high[0], operands[2]));
21123 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
21125 if (TARGET_CMOVE && scratch)
21127 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21128 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21130 ix86_expand_clear (scratch);
21131 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21136 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
21137 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
21139 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
21144 /* Predict just emitted jump instruction to be taken with probability PROB. */
21146 predict_jump (int prob)
21148 rtx insn = get_last_insn ();
21149 gcc_assert (JUMP_P (insn));
21150 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
21153 /* Helper function for the string operations below. Dest VARIABLE whether
21154 it is aligned to VALUE bytes. If true, jump to the label. */
21156 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
21158 rtx label = gen_label_rtx ();
21159 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
21160 if (GET_MODE (variable) == DImode)
21161 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
21163 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
21164 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
21167 predict_jump (REG_BR_PROB_BASE * 50 / 100);
21169 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21173 /* Adjust COUNTER by the VALUE. */
21175 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
21177 rtx (*gen_add)(rtx, rtx, rtx)
21178 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
21180 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
21183 /* Zero extend possibly SImode EXP to Pmode register. */
21185 ix86_zero_extend_to_Pmode (rtx exp)
21188 if (GET_MODE (exp) == VOIDmode)
21189 return force_reg (Pmode, exp);
21190 if (GET_MODE (exp) == Pmode)
21191 return copy_to_mode_reg (Pmode, exp);
21192 r = gen_reg_rtx (Pmode);
21193 emit_insn (gen_zero_extendsidi2 (r, exp));
21197 /* Divide COUNTREG by SCALE. */
21199 scale_counter (rtx countreg, int scale)
21205 if (CONST_INT_P (countreg))
21206 return GEN_INT (INTVAL (countreg) / scale);
21207 gcc_assert (REG_P (countreg));
21209 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
21210 GEN_INT (exact_log2 (scale)),
21211 NULL, 1, OPTAB_DIRECT);
21215 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21216 DImode for constant loop counts. */
21218 static enum machine_mode
21219 counter_mode (rtx count_exp)
21221 if (GET_MODE (count_exp) != VOIDmode)
21222 return GET_MODE (count_exp);
21223 if (!CONST_INT_P (count_exp))
21225 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
21230 /* When SRCPTR is non-NULL, output simple loop to move memory
21231 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21232 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21233 equivalent loop to set memory by VALUE (supposed to be in MODE).
21235 The size is rounded down to whole number of chunk size moved at once.
21236 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21240 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
21241 rtx destptr, rtx srcptr, rtx value,
21242 rtx count, enum machine_mode mode, int unroll,
21245 rtx out_label, top_label, iter, tmp;
21246 enum machine_mode iter_mode = counter_mode (count);
21247 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
21248 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
21254 top_label = gen_label_rtx ();
21255 out_label = gen_label_rtx ();
21256 iter = gen_reg_rtx (iter_mode);
21258 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
21259 NULL, 1, OPTAB_DIRECT);
21260 /* Those two should combine. */
21261 if (piece_size == const1_rtx)
21263 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
21265 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21267 emit_move_insn (iter, const0_rtx);
21269 emit_label (top_label);
21271 tmp = convert_modes (Pmode, iter_mode, iter, true);
21272 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
21273 destmem = change_address (destmem, mode, x_addr);
21277 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
21278 srcmem = change_address (srcmem, mode, y_addr);
21280 /* When unrolling for chips that reorder memory reads and writes,
21281 we can save registers by using single temporary.
21282 Also using 4 temporaries is overkill in 32bit mode. */
21283 if (!TARGET_64BIT && 0)
21285 for (i = 0; i < unroll; i++)
21290 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21292 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21294 emit_move_insn (destmem, srcmem);
21300 gcc_assert (unroll <= 4);
21301 for (i = 0; i < unroll; i++)
21303 tmpreg[i] = gen_reg_rtx (mode);
21307 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21309 emit_move_insn (tmpreg[i], srcmem);
21311 for (i = 0; i < unroll; i++)
21316 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21318 emit_move_insn (destmem, tmpreg[i]);
21323 for (i = 0; i < unroll; i++)
21327 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21328 emit_move_insn (destmem, value);
21331 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
21332 true, OPTAB_LIB_WIDEN);
21334 emit_move_insn (iter, tmp);
21336 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
21338 if (expected_size != -1)
21340 expected_size /= GET_MODE_SIZE (mode) * unroll;
21341 if (expected_size == 0)
21343 else if (expected_size > REG_BR_PROB_BASE)
21344 predict_jump (REG_BR_PROB_BASE - 1);
21346 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
21349 predict_jump (REG_BR_PROB_BASE * 80 / 100);
21350 iter = ix86_zero_extend_to_Pmode (iter);
21351 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
21352 true, OPTAB_LIB_WIDEN);
21353 if (tmp != destptr)
21354 emit_move_insn (destptr, tmp);
21357 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
21358 true, OPTAB_LIB_WIDEN);
21360 emit_move_insn (srcptr, tmp);
21362 emit_label (out_label);
21365 /* Output "rep; mov" instruction.
21366 Arguments have same meaning as for previous function */
21368 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
21369 rtx destptr, rtx srcptr,
21371 enum machine_mode mode)
21376 HOST_WIDE_INT rounded_count;
21378 /* If the size is known, it is shorter to use rep movs. */
21379 if (mode == QImode && CONST_INT_P (count)
21380 && !(INTVAL (count) & 3))
21383 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21384 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21385 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
21386 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
21387 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21388 if (mode != QImode)
21390 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21391 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21392 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21393 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
21394 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21395 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
21399 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21400 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
21402 if (CONST_INT_P (count))
21404 rounded_count = (INTVAL (count)
21405 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21406 destmem = shallow_copy_rtx (destmem);
21407 srcmem = shallow_copy_rtx (srcmem);
21408 set_mem_size (destmem, rounded_count);
21409 set_mem_size (srcmem, rounded_count);
21413 if (MEM_SIZE_KNOWN_P (destmem))
21414 clear_mem_size (destmem);
21415 if (MEM_SIZE_KNOWN_P (srcmem))
21416 clear_mem_size (srcmem);
21418 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
21422 /* Output "rep; stos" instruction.
21423 Arguments have same meaning as for previous function */
21425 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
21426 rtx count, enum machine_mode mode,
21431 HOST_WIDE_INT rounded_count;
21433 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21434 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21435 value = force_reg (mode, gen_lowpart (mode, value));
21436 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21437 if (mode != QImode)
21439 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21440 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21441 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21444 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21445 if (orig_value == const0_rtx && CONST_INT_P (count))
21447 rounded_count = (INTVAL (count)
21448 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21449 destmem = shallow_copy_rtx (destmem);
21450 set_mem_size (destmem, rounded_count);
21452 else if (MEM_SIZE_KNOWN_P (destmem))
21453 clear_mem_size (destmem);
21454 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
21458 emit_strmov (rtx destmem, rtx srcmem,
21459 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
21461 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
21462 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
21463 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21466 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21468 expand_movmem_epilogue (rtx destmem, rtx srcmem,
21469 rtx destptr, rtx srcptr, rtx count, int max_size)
21472 if (CONST_INT_P (count))
21474 HOST_WIDE_INT countval = INTVAL (count);
21477 if ((countval & 0x10) && max_size > 16)
21481 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
21482 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
21485 gcc_unreachable ();
21488 if ((countval & 0x08) && max_size > 8)
21491 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
21494 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
21495 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
21499 if ((countval & 0x04) && max_size > 4)
21501 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
21504 if ((countval & 0x02) && max_size > 2)
21506 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
21509 if ((countval & 0x01) && max_size > 1)
21511 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
21518 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
21519 count, 1, OPTAB_DIRECT);
21520 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
21521 count, QImode, 1, 4);
21525 /* When there are stringops, we can cheaply increase dest and src pointers.
21526 Otherwise we save code size by maintaining offset (zero is readily
21527 available from preceding rep operation) and using x86 addressing modes.
21529 if (TARGET_SINGLE_STRINGOP)
21533 rtx label = ix86_expand_aligntest (count, 4, true);
21534 src = change_address (srcmem, SImode, srcptr);
21535 dest = change_address (destmem, SImode, destptr);
21536 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21537 emit_label (label);
21538 LABEL_NUSES (label) = 1;
21542 rtx label = ix86_expand_aligntest (count, 2, true);
21543 src = change_address (srcmem, HImode, srcptr);
21544 dest = change_address (destmem, HImode, destptr);
21545 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21546 emit_label (label);
21547 LABEL_NUSES (label) = 1;
21551 rtx label = ix86_expand_aligntest (count, 1, true);
21552 src = change_address (srcmem, QImode, srcptr);
21553 dest = change_address (destmem, QImode, destptr);
21554 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21555 emit_label (label);
21556 LABEL_NUSES (label) = 1;
21561 rtx offset = force_reg (Pmode, const0_rtx);
21566 rtx label = ix86_expand_aligntest (count, 4, true);
21567 src = change_address (srcmem, SImode, srcptr);
21568 dest = change_address (destmem, SImode, destptr);
21569 emit_move_insn (dest, src);
21570 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
21571 true, OPTAB_LIB_WIDEN);
21573 emit_move_insn (offset, tmp);
21574 emit_label (label);
21575 LABEL_NUSES (label) = 1;
21579 rtx label = ix86_expand_aligntest (count, 2, true);
21580 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
21581 src = change_address (srcmem, HImode, tmp);
21582 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
21583 dest = change_address (destmem, HImode, tmp);
21584 emit_move_insn (dest, src);
21585 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
21586 true, OPTAB_LIB_WIDEN);
21588 emit_move_insn (offset, tmp);
21589 emit_label (label);
21590 LABEL_NUSES (label) = 1;
21594 rtx label = ix86_expand_aligntest (count, 1, true);
21595 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
21596 src = change_address (srcmem, QImode, tmp);
21597 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
21598 dest = change_address (destmem, QImode, tmp);
21599 emit_move_insn (dest, src);
21600 emit_label (label);
21601 LABEL_NUSES (label) = 1;
21606 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21608 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
21609 rtx count, int max_size)
21612 expand_simple_binop (counter_mode (count), AND, count,
21613 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
21614 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
21615 gen_lowpart (QImode, value), count, QImode,
21619 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21621 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
21625 if (CONST_INT_P (count))
21627 HOST_WIDE_INT countval = INTVAL (count);
21630 if ((countval & 0x10) && max_size > 16)
21634 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
21635 emit_insn (gen_strset (destptr, dest, value));
21636 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
21637 emit_insn (gen_strset (destptr, dest, value));
21640 gcc_unreachable ();
21643 if ((countval & 0x08) && max_size > 8)
21647 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
21648 emit_insn (gen_strset (destptr, dest, value));
21652 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
21653 emit_insn (gen_strset (destptr, dest, value));
21654 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
21655 emit_insn (gen_strset (destptr, dest, value));
21659 if ((countval & 0x04) && max_size > 4)
21661 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
21662 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
21665 if ((countval & 0x02) && max_size > 2)
21667 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
21668 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
21671 if ((countval & 0x01) && max_size > 1)
21673 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
21674 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
21681 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
21686 rtx label = ix86_expand_aligntest (count, 16, true);
21689 dest = change_address (destmem, DImode, destptr);
21690 emit_insn (gen_strset (destptr, dest, value));
21691 emit_insn (gen_strset (destptr, dest, value));
21695 dest = change_address (destmem, SImode, destptr);
21696 emit_insn (gen_strset (destptr, dest, value));
21697 emit_insn (gen_strset (destptr, dest, value));
21698 emit_insn (gen_strset (destptr, dest, value));
21699 emit_insn (gen_strset (destptr, dest, value));
21701 emit_label (label);
21702 LABEL_NUSES (label) = 1;
21706 rtx label = ix86_expand_aligntest (count, 8, true);
21709 dest = change_address (destmem, DImode, destptr);
21710 emit_insn (gen_strset (destptr, dest, value));
21714 dest = change_address (destmem, SImode, destptr);
21715 emit_insn (gen_strset (destptr, dest, value));
21716 emit_insn (gen_strset (destptr, dest, value));
21718 emit_label (label);
21719 LABEL_NUSES (label) = 1;
21723 rtx label = ix86_expand_aligntest (count, 4, true);
21724 dest = change_address (destmem, SImode, destptr);
21725 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
21726 emit_label (label);
21727 LABEL_NUSES (label) = 1;
21731 rtx label = ix86_expand_aligntest (count, 2, true);
21732 dest = change_address (destmem, HImode, destptr);
21733 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
21734 emit_label (label);
21735 LABEL_NUSES (label) = 1;
21739 rtx label = ix86_expand_aligntest (count, 1, true);
21740 dest = change_address (destmem, QImode, destptr);
21741 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
21742 emit_label (label);
21743 LABEL_NUSES (label) = 1;
21747 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21748 DESIRED_ALIGNMENT. */
21750 expand_movmem_prologue (rtx destmem, rtx srcmem,
21751 rtx destptr, rtx srcptr, rtx count,
21752 int align, int desired_alignment)
21754 if (align <= 1 && desired_alignment > 1)
21756 rtx label = ix86_expand_aligntest (destptr, 1, false);
21757 srcmem = change_address (srcmem, QImode, srcptr);
21758 destmem = change_address (destmem, QImode, destptr);
21759 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21760 ix86_adjust_counter (count, 1);
21761 emit_label (label);
21762 LABEL_NUSES (label) = 1;
21764 if (align <= 2 && desired_alignment > 2)
21766 rtx label = ix86_expand_aligntest (destptr, 2, false);
21767 srcmem = change_address (srcmem, HImode, srcptr);
21768 destmem = change_address (destmem, HImode, destptr);
21769 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21770 ix86_adjust_counter (count, 2);
21771 emit_label (label);
21772 LABEL_NUSES (label) = 1;
21774 if (align <= 4 && desired_alignment > 4)
21776 rtx label = ix86_expand_aligntest (destptr, 4, false);
21777 srcmem = change_address (srcmem, SImode, srcptr);
21778 destmem = change_address (destmem, SImode, destptr);
21779 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21780 ix86_adjust_counter (count, 4);
21781 emit_label (label);
21782 LABEL_NUSES (label) = 1;
21784 gcc_assert (desired_alignment <= 8);
21787 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21788 ALIGN_BYTES is how many bytes need to be copied. */
21790 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
21791 int desired_align, int align_bytes)
21794 rtx orig_dst = dst;
21795 rtx orig_src = src;
21797 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
21798 if (src_align_bytes >= 0)
21799 src_align_bytes = desired_align - src_align_bytes;
21800 if (align_bytes & 1)
21802 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21803 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
21805 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21807 if (align_bytes & 2)
21809 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21810 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
21811 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21812 set_mem_align (dst, 2 * BITS_PER_UNIT);
21813 if (src_align_bytes >= 0
21814 && (src_align_bytes & 1) == (align_bytes & 1)
21815 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
21816 set_mem_align (src, 2 * BITS_PER_UNIT);
21818 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21820 if (align_bytes & 4)
21822 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21823 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
21824 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21825 set_mem_align (dst, 4 * BITS_PER_UNIT);
21826 if (src_align_bytes >= 0)
21828 unsigned int src_align = 0;
21829 if ((src_align_bytes & 3) == (align_bytes & 3))
21831 else if ((src_align_bytes & 1) == (align_bytes & 1))
21833 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21834 set_mem_align (src, src_align * BITS_PER_UNIT);
21837 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21839 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21840 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
21841 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21842 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21843 if (src_align_bytes >= 0)
21845 unsigned int src_align = 0;
21846 if ((src_align_bytes & 7) == (align_bytes & 7))
21848 else if ((src_align_bytes & 3) == (align_bytes & 3))
21850 else if ((src_align_bytes & 1) == (align_bytes & 1))
21852 if (src_align > (unsigned int) desired_align)
21853 src_align = desired_align;
21854 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21855 set_mem_align (src, src_align * BITS_PER_UNIT);
21857 if (MEM_SIZE_KNOWN_P (orig_dst))
21858 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21859 if (MEM_SIZE_KNOWN_P (orig_src))
21860 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
21865 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21866 DESIRED_ALIGNMENT. */
21868 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
21869 int align, int desired_alignment)
21871 if (align <= 1 && desired_alignment > 1)
21873 rtx label = ix86_expand_aligntest (destptr, 1, false);
21874 destmem = change_address (destmem, QImode, destptr);
21875 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
21876 ix86_adjust_counter (count, 1);
21877 emit_label (label);
21878 LABEL_NUSES (label) = 1;
21880 if (align <= 2 && desired_alignment > 2)
21882 rtx label = ix86_expand_aligntest (destptr, 2, false);
21883 destmem = change_address (destmem, HImode, destptr);
21884 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
21885 ix86_adjust_counter (count, 2);
21886 emit_label (label);
21887 LABEL_NUSES (label) = 1;
21889 if (align <= 4 && desired_alignment > 4)
21891 rtx label = ix86_expand_aligntest (destptr, 4, false);
21892 destmem = change_address (destmem, SImode, destptr);
21893 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
21894 ix86_adjust_counter (count, 4);
21895 emit_label (label);
21896 LABEL_NUSES (label) = 1;
21898 gcc_assert (desired_alignment <= 8);
21901 /* Set enough from DST to align DST known to by aligned by ALIGN to
21902 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21904 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
21905 int desired_align, int align_bytes)
21908 rtx orig_dst = dst;
21909 if (align_bytes & 1)
21911 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21913 emit_insn (gen_strset (destreg, dst,
21914 gen_lowpart (QImode, value)));
21916 if (align_bytes & 2)
21918 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21919 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21920 set_mem_align (dst, 2 * BITS_PER_UNIT);
21922 emit_insn (gen_strset (destreg, dst,
21923 gen_lowpart (HImode, value)));
21925 if (align_bytes & 4)
21927 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21928 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21929 set_mem_align (dst, 4 * BITS_PER_UNIT);
21931 emit_insn (gen_strset (destreg, dst,
21932 gen_lowpart (SImode, value)));
21934 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21935 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21936 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21937 if (MEM_SIZE_KNOWN_P (orig_dst))
21938 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21942 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21943 static enum stringop_alg
21944 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
21945 int *dynamic_check)
21947 const struct stringop_algs * algs;
21948 bool optimize_for_speed;
21949 /* Algorithms using the rep prefix want at least edi and ecx;
21950 additionally, memset wants eax and memcpy wants esi. Don't
21951 consider such algorithms if the user has appropriated those
21952 registers for their own purposes. */
21953 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
21955 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
21957 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21958 || (alg != rep_prefix_1_byte \
21959 && alg != rep_prefix_4_byte \
21960 && alg != rep_prefix_8_byte))
21961 const struct processor_costs *cost;
21963 /* Even if the string operation call is cold, we still might spend a lot
21964 of time processing large blocks. */
21965 if (optimize_function_for_size_p (cfun)
21966 || (optimize_insn_for_size_p ()
21967 && expected_size != -1 && expected_size < 256))
21968 optimize_for_speed = false;
21970 optimize_for_speed = true;
21972 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
21974 *dynamic_check = -1;
21976 algs = &cost->memset[TARGET_64BIT != 0];
21978 algs = &cost->memcpy[TARGET_64BIT != 0];
21979 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
21980 return ix86_stringop_alg;
21981 /* rep; movq or rep; movl is the smallest variant. */
21982 else if (!optimize_for_speed)
21984 if (!count || (count & 3))
21985 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
21987 return rep_prefix_usable ? rep_prefix_4_byte : loop;
21989 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21991 else if (expected_size != -1 && expected_size < 4)
21992 return loop_1_byte;
21993 else if (expected_size != -1)
21996 enum stringop_alg alg = libcall;
21997 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
21999 /* We get here if the algorithms that were not libcall-based
22000 were rep-prefix based and we are unable to use rep prefixes
22001 based on global register usage. Break out of the loop and
22002 use the heuristic below. */
22003 if (algs->size[i].max == 0)
22005 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
22007 enum stringop_alg candidate = algs->size[i].alg;
22009 if (candidate != libcall && ALG_USABLE_P (candidate))
22011 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22012 last non-libcall inline algorithm. */
22013 if (TARGET_INLINE_ALL_STRINGOPS)
22015 /* When the current size is best to be copied by a libcall,
22016 but we are still forced to inline, run the heuristic below
22017 that will pick code for medium sized blocks. */
22018 if (alg != libcall)
22022 else if (ALG_USABLE_P (candidate))
22026 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
22028 /* When asked to inline the call anyway, try to pick meaningful choice.
22029 We look for maximal size of block that is faster to copy by hand and
22030 take blocks of at most of that size guessing that average size will
22031 be roughly half of the block.
22033 If this turns out to be bad, we might simply specify the preferred
22034 choice in ix86_costs. */
22035 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22036 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
22039 enum stringop_alg alg;
22041 bool any_alg_usable_p = true;
22043 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
22045 enum stringop_alg candidate = algs->size[i].alg;
22046 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
22048 if (candidate != libcall && candidate
22049 && ALG_USABLE_P (candidate))
22050 max = algs->size[i].max;
22052 /* If there aren't any usable algorithms, then recursing on
22053 smaller sizes isn't going to find anything. Just return the
22054 simple byte-at-a-time copy loop. */
22055 if (!any_alg_usable_p)
22057 /* Pick something reasonable. */
22058 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22059 *dynamic_check = 128;
22060 return loop_1_byte;
22064 alg = decide_alg (count, max / 2, memset, dynamic_check);
22065 gcc_assert (*dynamic_check == -1);
22066 gcc_assert (alg != libcall);
22067 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22068 *dynamic_check = max;
22071 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
22072 #undef ALG_USABLE_P
22075 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22076 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22078 decide_alignment (int align,
22079 enum stringop_alg alg,
22082 int desired_align = 0;
22086 gcc_unreachable ();
22088 case unrolled_loop:
22089 desired_align = GET_MODE_SIZE (Pmode);
22091 case rep_prefix_8_byte:
22094 case rep_prefix_4_byte:
22095 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22096 copying whole cacheline at once. */
22097 if (TARGET_PENTIUMPRO)
22102 case rep_prefix_1_byte:
22103 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22104 copying whole cacheline at once. */
22105 if (TARGET_PENTIUMPRO)
22119 if (desired_align < align)
22120 desired_align = align;
22121 if (expected_size != -1 && expected_size < 4)
22122 desired_align = align;
22123 return desired_align;
22126 /* Return the smallest power of 2 greater than VAL. */
22128 smallest_pow2_greater_than (int val)
22136 /* Expand string move (memcpy) operation. Use i386 string operations
22137 when profitable. expand_setmem contains similar code. The code
22138 depends upon architecture, block size and alignment, but always has
22139 the same overall structure:
22141 1) Prologue guard: Conditional that jumps up to epilogues for small
22142 blocks that can be handled by epilogue alone. This is faster
22143 but also needed for correctness, since prologue assume the block
22144 is larger than the desired alignment.
22146 Optional dynamic check for size and libcall for large
22147 blocks is emitted here too, with -minline-stringops-dynamically.
22149 2) Prologue: copy first few bytes in order to get destination
22150 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22151 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22152 copied. We emit either a jump tree on power of two sized
22153 blocks, or a byte loop.
22155 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22156 with specified algorithm.
22158 4) Epilogue: code copying tail of the block that is too small to be
22159 handled by main body (or up to size guarded by prologue guard). */
22162 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
22163 rtx expected_align_exp, rtx expected_size_exp)
22169 rtx jump_around_label = NULL;
22170 HOST_WIDE_INT align = 1;
22171 unsigned HOST_WIDE_INT count = 0;
22172 HOST_WIDE_INT expected_size = -1;
22173 int size_needed = 0, epilogue_size_needed;
22174 int desired_align = 0, align_bytes = 0;
22175 enum stringop_alg alg;
22177 bool need_zero_guard = false;
22179 if (CONST_INT_P (align_exp))
22180 align = INTVAL (align_exp);
22181 /* i386 can do misaligned access on reasonably increased cost. */
22182 if (CONST_INT_P (expected_align_exp)
22183 && INTVAL (expected_align_exp) > align)
22184 align = INTVAL (expected_align_exp);
22185 /* ALIGN is the minimum of destination and source alignment, but we care here
22186 just about destination alignment. */
22187 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
22188 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
22190 if (CONST_INT_P (count_exp))
22191 count = expected_size = INTVAL (count_exp);
22192 if (CONST_INT_P (expected_size_exp) && count == 0)
22193 expected_size = INTVAL (expected_size_exp);
22195 /* Make sure we don't need to care about overflow later on. */
22196 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22199 /* Step 0: Decide on preferred algorithm, desired alignment and
22200 size of chunks to be copied by main loop. */
22202 alg = decide_alg (count, expected_size, false, &dynamic_check);
22203 desired_align = decide_alignment (align, alg, expected_size);
22205 if (!TARGET_ALIGN_STRINGOPS)
22206 align = desired_align;
22208 if (alg == libcall)
22210 gcc_assert (alg != no_stringop);
22212 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
22213 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
22214 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
22219 gcc_unreachable ();
22221 need_zero_guard = true;
22222 size_needed = GET_MODE_SIZE (Pmode);
22224 case unrolled_loop:
22225 need_zero_guard = true;
22226 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
22228 case rep_prefix_8_byte:
22231 case rep_prefix_4_byte:
22234 case rep_prefix_1_byte:
22238 need_zero_guard = true;
22243 epilogue_size_needed = size_needed;
22245 /* Step 1: Prologue guard. */
22247 /* Alignment code needs count to be in register. */
22248 if (CONST_INT_P (count_exp) && desired_align > align)
22250 if (INTVAL (count_exp) > desired_align
22251 && INTVAL (count_exp) > size_needed)
22254 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22255 if (align_bytes <= 0)
22258 align_bytes = desired_align - align_bytes;
22260 if (align_bytes == 0)
22261 count_exp = force_reg (counter_mode (count_exp), count_exp);
22263 gcc_assert (desired_align >= 1 && align >= 1);
22265 /* Ensure that alignment prologue won't copy past end of block. */
22266 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22268 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22269 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22270 Make sure it is power of 2. */
22271 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22275 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22277 /* If main algorithm works on QImode, no epilogue is needed.
22278 For small sizes just don't align anything. */
22279 if (size_needed == 1)
22280 desired_align = align;
22287 label = gen_label_rtx ();
22288 emit_cmp_and_jump_insns (count_exp,
22289 GEN_INT (epilogue_size_needed),
22290 LTU, 0, counter_mode (count_exp), 1, label);
22291 if (expected_size == -1 || expected_size < epilogue_size_needed)
22292 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22294 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22298 /* Emit code to decide on runtime whether library call or inline should be
22300 if (dynamic_check != -1)
22302 if (CONST_INT_P (count_exp))
22304 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
22306 emit_block_move_via_libcall (dst, src, count_exp, false);
22307 count_exp = const0_rtx;
22313 rtx hot_label = gen_label_rtx ();
22314 jump_around_label = gen_label_rtx ();
22315 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22316 LEU, 0, GET_MODE (count_exp), 1, hot_label);
22317 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22318 emit_block_move_via_libcall (dst, src, count_exp, false);
22319 emit_jump (jump_around_label);
22320 emit_label (hot_label);
22324 /* Step 2: Alignment prologue. */
22326 if (desired_align > align)
22328 if (align_bytes == 0)
22330 /* Except for the first move in epilogue, we no longer know
22331 constant offset in aliasing info. It don't seems to worth
22332 the pain to maintain it for the first move, so throw away
22334 src = change_address (src, BLKmode, srcreg);
22335 dst = change_address (dst, BLKmode, destreg);
22336 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
22341 /* If we know how many bytes need to be stored before dst is
22342 sufficiently aligned, maintain aliasing info accurately. */
22343 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
22344 desired_align, align_bytes);
22345 count_exp = plus_constant (count_exp, -align_bytes);
22346 count -= align_bytes;
22348 if (need_zero_guard
22349 && (count < (unsigned HOST_WIDE_INT) size_needed
22350 || (align_bytes == 0
22351 && count < ((unsigned HOST_WIDE_INT) size_needed
22352 + desired_align - align))))
22354 /* It is possible that we copied enough so the main loop will not
22356 gcc_assert (size_needed > 1);
22357 if (label == NULL_RTX)
22358 label = gen_label_rtx ();
22359 emit_cmp_and_jump_insns (count_exp,
22360 GEN_INT (size_needed),
22361 LTU, 0, counter_mode (count_exp), 1, label);
22362 if (expected_size == -1
22363 || expected_size < (desired_align - align) / 2 + size_needed)
22364 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22366 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22369 if (label && size_needed == 1)
22371 emit_label (label);
22372 LABEL_NUSES (label) = 1;
22374 epilogue_size_needed = 1;
22376 else if (label == NULL_RTX)
22377 epilogue_size_needed = size_needed;
22379 /* Step 3: Main loop. */
22385 gcc_unreachable ();
22387 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22388 count_exp, QImode, 1, expected_size);
22391 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22392 count_exp, Pmode, 1, expected_size);
22394 case unrolled_loop:
22395 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22396 registers for 4 temporaries anyway. */
22397 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22398 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
22401 case rep_prefix_8_byte:
22402 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22405 case rep_prefix_4_byte:
22406 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22409 case rep_prefix_1_byte:
22410 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22414 /* Adjust properly the offset of src and dest memory for aliasing. */
22415 if (CONST_INT_P (count_exp))
22417 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
22418 (count / size_needed) * size_needed);
22419 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
22420 (count / size_needed) * size_needed);
22424 src = change_address (src, BLKmode, srcreg);
22425 dst = change_address (dst, BLKmode, destreg);
22428 /* Step 4: Epilogue to copy the remaining bytes. */
22432 /* When the main loop is done, COUNT_EXP might hold original count,
22433 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22434 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22435 bytes. Compensate if needed. */
22437 if (size_needed < epilogue_size_needed)
22440 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22441 GEN_INT (size_needed - 1), count_exp, 1,
22443 if (tmp != count_exp)
22444 emit_move_insn (count_exp, tmp);
22446 emit_label (label);
22447 LABEL_NUSES (label) = 1;
22450 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22451 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
22452 epilogue_size_needed);
22453 if (jump_around_label)
22454 emit_label (jump_around_label);
22458 /* Helper function for memcpy. For QImode value 0xXY produce
22459 0xXYXYXYXY of wide specified by MODE. This is essentially
22460 a * 0x10101010, but we can do slightly better than
22461 synth_mult by unwinding the sequence by hand on CPUs with
22464 promote_duplicated_reg (enum machine_mode mode, rtx val)
22466 enum machine_mode valmode = GET_MODE (val);
22468 int nops = mode == DImode ? 3 : 2;
22470 gcc_assert (mode == SImode || mode == DImode);
22471 if (val == const0_rtx)
22472 return copy_to_mode_reg (mode, const0_rtx);
22473 if (CONST_INT_P (val))
22475 HOST_WIDE_INT v = INTVAL (val) & 255;
22479 if (mode == DImode)
22480 v |= (v << 16) << 16;
22481 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
22484 if (valmode == VOIDmode)
22486 if (valmode != QImode)
22487 val = gen_lowpart (QImode, val);
22488 if (mode == QImode)
22490 if (!TARGET_PARTIAL_REG_STALL)
22492 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
22493 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
22494 <= (ix86_cost->shift_const + ix86_cost->add) * nops
22495 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
22497 rtx reg = convert_modes (mode, QImode, val, true);
22498 tmp = promote_duplicated_reg (mode, const1_rtx);
22499 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
22504 rtx reg = convert_modes (mode, QImode, val, true);
22506 if (!TARGET_PARTIAL_REG_STALL)
22507 if (mode == SImode)
22508 emit_insn (gen_movsi_insv_1 (reg, reg));
22510 emit_insn (gen_movdi_insv_1 (reg, reg));
22513 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
22514 NULL, 1, OPTAB_DIRECT);
22516 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22518 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
22519 NULL, 1, OPTAB_DIRECT);
22520 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22521 if (mode == SImode)
22523 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
22524 NULL, 1, OPTAB_DIRECT);
22525 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22530 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22531 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22532 alignment from ALIGN to DESIRED_ALIGN. */
22534 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
22539 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
22540 promoted_val = promote_duplicated_reg (DImode, val);
22541 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
22542 promoted_val = promote_duplicated_reg (SImode, val);
22543 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
22544 promoted_val = promote_duplicated_reg (HImode, val);
22546 promoted_val = val;
22548 return promoted_val;
22551 /* Expand string clear operation (bzero). Use i386 string operations when
22552 profitable. See expand_movmem comment for explanation of individual
22553 steps performed. */
22555 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
22556 rtx expected_align_exp, rtx expected_size_exp)
22561 rtx jump_around_label = NULL;
22562 HOST_WIDE_INT align = 1;
22563 unsigned HOST_WIDE_INT count = 0;
22564 HOST_WIDE_INT expected_size = -1;
22565 int size_needed = 0, epilogue_size_needed;
22566 int desired_align = 0, align_bytes = 0;
22567 enum stringop_alg alg;
22568 rtx promoted_val = NULL;
22569 bool force_loopy_epilogue = false;
22571 bool need_zero_guard = false;
22573 if (CONST_INT_P (align_exp))
22574 align = INTVAL (align_exp);
22575 /* i386 can do misaligned access on reasonably increased cost. */
22576 if (CONST_INT_P (expected_align_exp)
22577 && INTVAL (expected_align_exp) > align)
22578 align = INTVAL (expected_align_exp);
22579 if (CONST_INT_P (count_exp))
22580 count = expected_size = INTVAL (count_exp);
22581 if (CONST_INT_P (expected_size_exp) && count == 0)
22582 expected_size = INTVAL (expected_size_exp);
22584 /* Make sure we don't need to care about overflow later on. */
22585 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22588 /* Step 0: Decide on preferred algorithm, desired alignment and
22589 size of chunks to be copied by main loop. */
22591 alg = decide_alg (count, expected_size, true, &dynamic_check);
22592 desired_align = decide_alignment (align, alg, expected_size);
22594 if (!TARGET_ALIGN_STRINGOPS)
22595 align = desired_align;
22597 if (alg == libcall)
22599 gcc_assert (alg != no_stringop);
22601 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
22602 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
22607 gcc_unreachable ();
22609 need_zero_guard = true;
22610 size_needed = GET_MODE_SIZE (Pmode);
22612 case unrolled_loop:
22613 need_zero_guard = true;
22614 size_needed = GET_MODE_SIZE (Pmode) * 4;
22616 case rep_prefix_8_byte:
22619 case rep_prefix_4_byte:
22622 case rep_prefix_1_byte:
22626 need_zero_guard = true;
22630 epilogue_size_needed = size_needed;
22632 /* Step 1: Prologue guard. */
22634 /* Alignment code needs count to be in register. */
22635 if (CONST_INT_P (count_exp) && desired_align > align)
22637 if (INTVAL (count_exp) > desired_align
22638 && INTVAL (count_exp) > size_needed)
22641 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22642 if (align_bytes <= 0)
22645 align_bytes = desired_align - align_bytes;
22647 if (align_bytes == 0)
22649 enum machine_mode mode = SImode;
22650 if (TARGET_64BIT && (count & ~0xffffffff))
22652 count_exp = force_reg (mode, count_exp);
22655 /* Do the cheap promotion to allow better CSE across the
22656 main loop and epilogue (ie one load of the big constant in the
22657 front of all code. */
22658 if (CONST_INT_P (val_exp))
22659 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
22660 desired_align, align);
22661 /* Ensure that alignment prologue won't copy past end of block. */
22662 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22664 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22665 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22666 Make sure it is power of 2. */
22667 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22669 /* To improve performance of small blocks, we jump around the VAL
22670 promoting mode. This mean that if the promoted VAL is not constant,
22671 we might not use it in the epilogue and have to use byte
22673 if (epilogue_size_needed > 2 && !promoted_val)
22674 force_loopy_epilogue = true;
22677 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22679 /* If main algorithm works on QImode, no epilogue is needed.
22680 For small sizes just don't align anything. */
22681 if (size_needed == 1)
22682 desired_align = align;
22689 label = gen_label_rtx ();
22690 emit_cmp_and_jump_insns (count_exp,
22691 GEN_INT (epilogue_size_needed),
22692 LTU, 0, counter_mode (count_exp), 1, label);
22693 if (expected_size == -1 || expected_size <= epilogue_size_needed)
22694 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22696 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22699 if (dynamic_check != -1)
22701 rtx hot_label = gen_label_rtx ();
22702 jump_around_label = gen_label_rtx ();
22703 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22704 LEU, 0, counter_mode (count_exp), 1, hot_label);
22705 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22706 set_storage_via_libcall (dst, count_exp, val_exp, false);
22707 emit_jump (jump_around_label);
22708 emit_label (hot_label);
22711 /* Step 2: Alignment prologue. */
22713 /* Do the expensive promotion once we branched off the small blocks. */
22715 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
22716 desired_align, align);
22717 gcc_assert (desired_align >= 1 && align >= 1);
22719 if (desired_align > align)
22721 if (align_bytes == 0)
22723 /* Except for the first move in epilogue, we no longer know
22724 constant offset in aliasing info. It don't seems to worth
22725 the pain to maintain it for the first move, so throw away
22727 dst = change_address (dst, BLKmode, destreg);
22728 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
22733 /* If we know how many bytes need to be stored before dst is
22734 sufficiently aligned, maintain aliasing info accurately. */
22735 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
22736 desired_align, align_bytes);
22737 count_exp = plus_constant (count_exp, -align_bytes);
22738 count -= align_bytes;
22740 if (need_zero_guard
22741 && (count < (unsigned HOST_WIDE_INT) size_needed
22742 || (align_bytes == 0
22743 && count < ((unsigned HOST_WIDE_INT) size_needed
22744 + desired_align - align))))
22746 /* It is possible that we copied enough so the main loop will not
22748 gcc_assert (size_needed > 1);
22749 if (label == NULL_RTX)
22750 label = gen_label_rtx ();
22751 emit_cmp_and_jump_insns (count_exp,
22752 GEN_INT (size_needed),
22753 LTU, 0, counter_mode (count_exp), 1, label);
22754 if (expected_size == -1
22755 || expected_size < (desired_align - align) / 2 + size_needed)
22756 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22758 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22761 if (label && size_needed == 1)
22763 emit_label (label);
22764 LABEL_NUSES (label) = 1;
22766 promoted_val = val_exp;
22767 epilogue_size_needed = 1;
22769 else if (label == NULL_RTX)
22770 epilogue_size_needed = size_needed;
22772 /* Step 3: Main loop. */
22778 gcc_unreachable ();
22780 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22781 count_exp, QImode, 1, expected_size);
22784 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22785 count_exp, Pmode, 1, expected_size);
22787 case unrolled_loop:
22788 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22789 count_exp, Pmode, 4, expected_size);
22791 case rep_prefix_8_byte:
22792 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22795 case rep_prefix_4_byte:
22796 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22799 case rep_prefix_1_byte:
22800 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22804 /* Adjust properly the offset of src and dest memory for aliasing. */
22805 if (CONST_INT_P (count_exp))
22806 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
22807 (count / size_needed) * size_needed);
22809 dst = change_address (dst, BLKmode, destreg);
22811 /* Step 4: Epilogue to copy the remaining bytes. */
22815 /* When the main loop is done, COUNT_EXP might hold original count,
22816 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22817 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22818 bytes. Compensate if needed. */
22820 if (size_needed < epilogue_size_needed)
22823 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22824 GEN_INT (size_needed - 1), count_exp, 1,
22826 if (tmp != count_exp)
22827 emit_move_insn (count_exp, tmp);
22829 emit_label (label);
22830 LABEL_NUSES (label) = 1;
22833 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22835 if (force_loopy_epilogue)
22836 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
22837 epilogue_size_needed);
22839 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
22840 epilogue_size_needed);
22842 if (jump_around_label)
22843 emit_label (jump_around_label);
22847 /* Expand the appropriate insns for doing strlen if not just doing
22850 out = result, initialized with the start address
22851 align_rtx = alignment of the address.
22852 scratch = scratch register, initialized with the startaddress when
22853 not aligned, otherwise undefined
22855 This is just the body. It needs the initializations mentioned above and
22856 some address computing at the end. These things are done in i386.md. */
22859 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
22863 rtx align_2_label = NULL_RTX;
22864 rtx align_3_label = NULL_RTX;
22865 rtx align_4_label = gen_label_rtx ();
22866 rtx end_0_label = gen_label_rtx ();
22868 rtx tmpreg = gen_reg_rtx (SImode);
22869 rtx scratch = gen_reg_rtx (SImode);
22873 if (CONST_INT_P (align_rtx))
22874 align = INTVAL (align_rtx);
22876 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22878 /* Is there a known alignment and is it less than 4? */
22881 rtx scratch1 = gen_reg_rtx (Pmode);
22882 emit_move_insn (scratch1, out);
22883 /* Is there a known alignment and is it not 2? */
22886 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
22887 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
22889 /* Leave just the 3 lower bits. */
22890 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
22891 NULL_RTX, 0, OPTAB_WIDEN);
22893 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22894 Pmode, 1, align_4_label);
22895 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
22896 Pmode, 1, align_2_label);
22897 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
22898 Pmode, 1, align_3_label);
22902 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22903 check if is aligned to 4 - byte. */
22905 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
22906 NULL_RTX, 0, OPTAB_WIDEN);
22908 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22909 Pmode, 1, align_4_label);
22912 mem = change_address (src, QImode, out);
22914 /* Now compare the bytes. */
22916 /* Compare the first n unaligned byte on a byte per byte basis. */
22917 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
22918 QImode, 1, end_0_label);
22920 /* Increment the address. */
22921 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22923 /* Not needed with an alignment of 2 */
22926 emit_label (align_2_label);
22928 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
22931 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22933 emit_label (align_3_label);
22936 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
22939 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22942 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22943 align this loop. It gives only huge programs, but does not help to
22945 emit_label (align_4_label);
22947 mem = change_address (src, SImode, out);
22948 emit_move_insn (scratch, mem);
22949 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
22951 /* This formula yields a nonzero result iff one of the bytes is zero.
22952 This saves three branches inside loop and many cycles. */
22954 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
22955 emit_insn (gen_one_cmplsi2 (scratch, scratch));
22956 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
22957 emit_insn (gen_andsi3 (tmpreg, tmpreg,
22958 gen_int_mode (0x80808080, SImode)));
22959 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
22964 rtx reg = gen_reg_rtx (SImode);
22965 rtx reg2 = gen_reg_rtx (Pmode);
22966 emit_move_insn (reg, tmpreg);
22967 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
22969 /* If zero is not in the first two bytes, move two bytes forward. */
22970 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
22971 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22972 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
22973 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
22974 gen_rtx_IF_THEN_ELSE (SImode, tmp,
22977 /* Emit lea manually to avoid clobbering of flags. */
22978 emit_insn (gen_rtx_SET (SImode, reg2,
22979 gen_rtx_PLUS (Pmode, out, const2_rtx)));
22981 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22982 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
22983 emit_insn (gen_rtx_SET (VOIDmode, out,
22984 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
22990 rtx end_2_label = gen_label_rtx ();
22991 /* Is zero in the first two bytes? */
22993 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
22994 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22995 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
22996 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22997 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
22999 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
23000 JUMP_LABEL (tmp) = end_2_label;
23002 /* Not in the first two. Move two bytes forward. */
23003 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
23004 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
23006 emit_label (end_2_label);
23010 /* Avoid branch in fixing the byte. */
23011 tmpreg = gen_lowpart (QImode, tmpreg);
23012 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
23013 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
23014 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
23015 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
23017 emit_label (end_0_label);
23020 /* Expand strlen. */
23023 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
23025 rtx addr, scratch1, scratch2, scratch3, scratch4;
23027 /* The generic case of strlen expander is long. Avoid it's
23028 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23030 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
23031 && !TARGET_INLINE_ALL_STRINGOPS
23032 && !optimize_insn_for_size_p ()
23033 && (!CONST_INT_P (align) || INTVAL (align) < 4))
23036 addr = force_reg (Pmode, XEXP (src, 0));
23037 scratch1 = gen_reg_rtx (Pmode);
23039 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
23040 && !optimize_insn_for_size_p ())
23042 /* Well it seems that some optimizer does not combine a call like
23043 foo(strlen(bar), strlen(bar));
23044 when the move and the subtraction is done here. It does calculate
23045 the length just once when these instructions are done inside of
23046 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23047 often used and I use one fewer register for the lifetime of
23048 output_strlen_unroll() this is better. */
23050 emit_move_insn (out, addr);
23052 ix86_expand_strlensi_unroll_1 (out, src, align);
23054 /* strlensi_unroll_1 returns the address of the zero at the end of
23055 the string, like memchr(), so compute the length by subtracting
23056 the start address. */
23057 emit_insn (ix86_gen_sub3 (out, out, addr));
23063 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23064 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
23067 scratch2 = gen_reg_rtx (Pmode);
23068 scratch3 = gen_reg_rtx (Pmode);
23069 scratch4 = force_reg (Pmode, constm1_rtx);
23071 emit_move_insn (scratch3, addr);
23072 eoschar = force_reg (QImode, eoschar);
23074 src = replace_equiv_address_nv (src, scratch3);
23076 /* If .md starts supporting :P, this can be done in .md. */
23077 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
23078 scratch4), UNSPEC_SCAS);
23079 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
23080 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
23081 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
23086 /* For given symbol (function) construct code to compute address of it's PLT
23087 entry in large x86-64 PIC model. */
23089 construct_plt_address (rtx symbol)
23091 rtx tmp = gen_reg_rtx (Pmode);
23092 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
23094 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
23095 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
23097 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
23098 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
23103 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
23105 rtx pop, bool sibcall)
23107 /* We need to represent that SI and DI registers are clobbered
23109 static int clobbered_registers[] = {
23110 XMM6_REG, XMM7_REG, XMM8_REG,
23111 XMM9_REG, XMM10_REG, XMM11_REG,
23112 XMM12_REG, XMM13_REG, XMM14_REG,
23113 XMM15_REG, SI_REG, DI_REG
23115 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
23116 rtx use = NULL, call;
23117 unsigned int vec_len;
23119 if (pop == const0_rtx)
23121 gcc_assert (!TARGET_64BIT || !pop);
23123 if (TARGET_MACHO && !TARGET_64BIT)
23126 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
23127 fnaddr = machopic_indirect_call_target (fnaddr);
23132 /* Static functions and indirect calls don't need the pic register. */
23133 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
23134 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23135 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
23136 use_reg (&use, pic_offset_table_rtx);
23139 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
23141 rtx al = gen_rtx_REG (QImode, AX_REG);
23142 emit_move_insn (al, callarg2);
23143 use_reg (&use, al);
23146 if (ix86_cmodel == CM_LARGE_PIC
23148 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23149 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
23150 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
23152 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
23153 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
23155 fnaddr = XEXP (fnaddr, 0);
23156 if (GET_MODE (fnaddr) != Pmode)
23157 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
23158 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
23162 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
23164 call = gen_rtx_SET (VOIDmode, retval, call);
23165 vec[vec_len++] = call;
23169 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
23170 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
23171 vec[vec_len++] = pop;
23174 if (TARGET_64BIT_MS_ABI
23175 && (!callarg2 || INTVAL (callarg2) != -2))
23179 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
23180 UNSPEC_MS_TO_SYSV_CALL);
23182 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
23184 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
23186 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
23188 clobbered_registers[i]));
23191 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23192 if (TARGET_VZEROUPPER)
23195 if (cfun->machine->callee_pass_avx256_p)
23197 if (cfun->machine->callee_return_avx256_p)
23198 avx256 = callee_return_pass_avx256;
23200 avx256 = callee_pass_avx256;
23202 else if (cfun->machine->callee_return_avx256_p)
23203 avx256 = callee_return_avx256;
23205 avx256 = call_no_avx256;
23207 if (reload_completed)
23208 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
23210 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
23211 gen_rtvec (1, GEN_INT (avx256)),
23212 UNSPEC_CALL_NEEDS_VZEROUPPER);
23216 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
23217 call = emit_call_insn (call);
23219 CALL_INSN_FUNCTION_USAGE (call) = use;
23225 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
23227 rtx pat = PATTERN (insn);
23228 rtvec vec = XVEC (pat, 0);
23229 int len = GET_NUM_ELEM (vec) - 1;
23231 /* Strip off the last entry of the parallel. */
23232 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
23233 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
23235 pat = RTVEC_ELT (vec, 0);
23237 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
23239 emit_insn (gen_avx_vzeroupper (vzeroupper));
23240 emit_call_insn (pat);
23243 /* Output the assembly for a call instruction. */
23246 ix86_output_call_insn (rtx insn, rtx call_op)
23248 bool direct_p = constant_call_address_operand (call_op, Pmode);
23249 bool seh_nop_p = false;
23252 if (SIBLING_CALL_P (insn))
23256 /* SEH epilogue detection requires the indirect branch case
23257 to include REX.W. */
23258 else if (TARGET_SEH)
23259 xasm = "rex.W jmp %A0";
23263 output_asm_insn (xasm, &call_op);
23267 /* SEH unwinding can require an extra nop to be emitted in several
23268 circumstances. Determine if we have one of those. */
23273 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
23275 /* If we get to another real insn, we don't need the nop. */
23279 /* If we get to the epilogue note, prevent a catch region from
23280 being adjacent to the standard epilogue sequence. If non-
23281 call-exceptions, we'll have done this during epilogue emission. */
23282 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
23283 && !flag_non_call_exceptions
23284 && !can_throw_internal (insn))
23291 /* If we didn't find a real insn following the call, prevent the
23292 unwinder from looking into the next function. */
23298 xasm = "call\t%P0";
23300 xasm = "call\t%A0";
23302 output_asm_insn (xasm, &call_op);
23310 /* Clear stack slot assignments remembered from previous functions.
23311 This is called from INIT_EXPANDERS once before RTL is emitted for each
23314 static struct machine_function *
23315 ix86_init_machine_status (void)
23317 struct machine_function *f;
23319 f = ggc_alloc_cleared_machine_function ();
23320 f->use_fast_prologue_epilogue_nregs = -1;
23321 f->call_abi = ix86_abi;
23326 /* Return a MEM corresponding to a stack slot with mode MODE.
23327 Allocate a new slot if necessary.
23329 The RTL for a function can have several slots available: N is
23330 which slot to use. */
23333 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
23335 struct stack_local_entry *s;
23337 gcc_assert (n < MAX_386_STACK_LOCALS);
23339 for (s = ix86_stack_locals; s; s = s->next)
23340 if (s->mode == mode && s->n == n)
23341 return validize_mem (copy_rtx (s->rtl));
23343 s = ggc_alloc_stack_local_entry ();
23346 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
23348 s->next = ix86_stack_locals;
23349 ix86_stack_locals = s;
23350 return validize_mem (s->rtl);
23354 ix86_instantiate_decls (void)
23356 struct stack_local_entry *s;
23358 for (s = ix86_stack_locals; s; s = s->next)
23359 if (s->rtl != NULL_RTX)
23360 instantiate_decl_rtl (s->rtl);
23363 /* Calculate the length of the memory address in the instruction encoding.
23364 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23365 or other prefixes. We never generate addr32 prefix for LEA insn. */
23368 memory_address_length (rtx addr, bool lea)
23370 struct ix86_address parts;
23371 rtx base, index, disp;
23375 if (GET_CODE (addr) == PRE_DEC
23376 || GET_CODE (addr) == POST_INC
23377 || GET_CODE (addr) == PRE_MODIFY
23378 || GET_CODE (addr) == POST_MODIFY)
23381 ok = ix86_decompose_address (addr, &parts);
23384 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
23386 /* If this is not LEA instruction, add the length of addr32 prefix. */
23387 if (TARGET_64BIT && !lea
23388 && (SImode_address_operand (addr, VOIDmode)
23389 || (parts.base && GET_MODE (parts.base) == SImode)
23390 || (parts.index && GET_MODE (parts.index) == SImode)))
23394 index = parts.index;
23397 if (base && GET_CODE (base) == SUBREG)
23398 base = SUBREG_REG (base);
23399 if (index && GET_CODE (index) == SUBREG)
23400 index = SUBREG_REG (index);
23402 gcc_assert (base == NULL_RTX || REG_P (base));
23403 gcc_assert (index == NULL_RTX || REG_P (index));
23406 - esp as the base always wants an index,
23407 - ebp as the base always wants a displacement,
23408 - r12 as the base always wants an index,
23409 - r13 as the base always wants a displacement. */
23411 /* Register Indirect. */
23412 if (base && !index && !disp)
23414 /* esp (for its index) and ebp (for its displacement) need
23415 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23417 if (base == arg_pointer_rtx
23418 || base == frame_pointer_rtx
23419 || REGNO (base) == SP_REG
23420 || REGNO (base) == BP_REG
23421 || REGNO (base) == R12_REG
23422 || REGNO (base) == R13_REG)
23426 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23427 is not disp32, but disp32(%rip), so for disp32
23428 SIB byte is needed, unless print_operand_address
23429 optimizes it into disp32(%rip) or (%rip) is implied
23431 else if (disp && !base && !index)
23438 if (GET_CODE (disp) == CONST)
23439 symbol = XEXP (disp, 0);
23440 if (GET_CODE (symbol) == PLUS
23441 && CONST_INT_P (XEXP (symbol, 1)))
23442 symbol = XEXP (symbol, 0);
23444 if (GET_CODE (symbol) != LABEL_REF
23445 && (GET_CODE (symbol) != SYMBOL_REF
23446 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
23447 && (GET_CODE (symbol) != UNSPEC
23448 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
23449 && XINT (symbol, 1) != UNSPEC_PCREL
23450 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
23456 /* Find the length of the displacement constant. */
23459 if (base && satisfies_constraint_K (disp))
23464 /* ebp always wants a displacement. Similarly r13. */
23465 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
23468 /* An index requires the two-byte modrm form.... */
23470 /* ...like esp (or r12), which always wants an index. */
23471 || base == arg_pointer_rtx
23472 || base == frame_pointer_rtx
23473 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
23480 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23481 is set, expect that insn have 8bit immediate alternative. */
23483 ix86_attr_length_immediate_default (rtx insn, bool shortform)
23487 extract_insn_cached (insn);
23488 for (i = recog_data.n_operands - 1; i >= 0; --i)
23489 if (CONSTANT_P (recog_data.operand[i]))
23491 enum attr_mode mode = get_attr_mode (insn);
23494 if (shortform && CONST_INT_P (recog_data.operand[i]))
23496 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
23503 ival = trunc_int_for_mode (ival, HImode);
23506 ival = trunc_int_for_mode (ival, SImode);
23511 if (IN_RANGE (ival, -128, 127))
23528 /* Immediates for DImode instructions are encoded
23529 as 32bit sign extended values. */
23534 fatal_insn ("unknown insn mode", insn);
23540 /* Compute default value for "length_address" attribute. */
23542 ix86_attr_length_address_default (rtx insn)
23546 if (get_attr_type (insn) == TYPE_LEA)
23548 rtx set = PATTERN (insn), addr;
23550 if (GET_CODE (set) == PARALLEL)
23551 set = XVECEXP (set, 0, 0);
23553 gcc_assert (GET_CODE (set) == SET);
23555 addr = SET_SRC (set);
23557 return memory_address_length (addr, true);
23560 extract_insn_cached (insn);
23561 for (i = recog_data.n_operands - 1; i >= 0; --i)
23562 if (MEM_P (recog_data.operand[i]))
23564 constrain_operands_cached (reload_completed);
23565 if (which_alternative != -1)
23567 const char *constraints = recog_data.constraints[i];
23568 int alt = which_alternative;
23570 while (*constraints == '=' || *constraints == '+')
23573 while (*constraints++ != ',')
23575 /* Skip ignored operands. */
23576 if (*constraints == 'X')
23579 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
23584 /* Compute default value for "length_vex" attribute. It includes
23585 2 or 3 byte VEX prefix and 1 opcode byte. */
23588 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
23592 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23593 byte VEX prefix. */
23594 if (!has_0f_opcode || has_vex_w)
23597 /* We can always use 2 byte VEX prefix in 32bit. */
23601 extract_insn_cached (insn);
23603 for (i = recog_data.n_operands - 1; i >= 0; --i)
23604 if (REG_P (recog_data.operand[i]))
23606 /* REX.W bit uses 3 byte VEX prefix. */
23607 if (GET_MODE (recog_data.operand[i]) == DImode
23608 && GENERAL_REG_P (recog_data.operand[i]))
23613 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23614 if (MEM_P (recog_data.operand[i])
23615 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
23622 /* Return the maximum number of instructions a cpu can issue. */
23625 ix86_issue_rate (void)
23629 case PROCESSOR_PENTIUM:
23630 case PROCESSOR_ATOM:
23634 case PROCESSOR_PENTIUMPRO:
23635 case PROCESSOR_PENTIUM4:
23636 case PROCESSOR_CORE2_32:
23637 case PROCESSOR_CORE2_64:
23638 case PROCESSOR_COREI7_32:
23639 case PROCESSOR_COREI7_64:
23640 case PROCESSOR_ATHLON:
23642 case PROCESSOR_AMDFAM10:
23643 case PROCESSOR_NOCONA:
23644 case PROCESSOR_GENERIC32:
23645 case PROCESSOR_GENERIC64:
23646 case PROCESSOR_BDVER1:
23647 case PROCESSOR_BDVER2:
23648 case PROCESSOR_BTVER1:
23656 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23657 by DEP_INSN and nothing set by DEP_INSN. */
23660 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
23664 /* Simplify the test for uninteresting insns. */
23665 if (insn_type != TYPE_SETCC
23666 && insn_type != TYPE_ICMOV
23667 && insn_type != TYPE_FCMOV
23668 && insn_type != TYPE_IBR)
23671 if ((set = single_set (dep_insn)) != 0)
23673 set = SET_DEST (set);
23676 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
23677 && XVECLEN (PATTERN (dep_insn), 0) == 2
23678 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
23679 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
23681 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
23682 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
23687 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
23690 /* This test is true if the dependent insn reads the flags but
23691 not any other potentially set register. */
23692 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
23695 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
23701 /* Return true iff USE_INSN has a memory address with operands set by
23705 ix86_agi_dependent (rtx set_insn, rtx use_insn)
23708 extract_insn_cached (use_insn);
23709 for (i = recog_data.n_operands - 1; i >= 0; --i)
23710 if (MEM_P (recog_data.operand[i]))
23712 rtx addr = XEXP (recog_data.operand[i], 0);
23713 return modified_in_p (addr, set_insn) != 0;
23719 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
23721 enum attr_type insn_type, dep_insn_type;
23722 enum attr_memory memory;
23724 int dep_insn_code_number;
23726 /* Anti and output dependencies have zero cost on all CPUs. */
23727 if (REG_NOTE_KIND (link) != 0)
23730 dep_insn_code_number = recog_memoized (dep_insn);
23732 /* If we can't recognize the insns, we can't really do anything. */
23733 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
23736 insn_type = get_attr_type (insn);
23737 dep_insn_type = get_attr_type (dep_insn);
23741 case PROCESSOR_PENTIUM:
23742 /* Address Generation Interlock adds a cycle of latency. */
23743 if (insn_type == TYPE_LEA)
23745 rtx addr = PATTERN (insn);
23747 if (GET_CODE (addr) == PARALLEL)
23748 addr = XVECEXP (addr, 0, 0);
23750 gcc_assert (GET_CODE (addr) == SET);
23752 addr = SET_SRC (addr);
23753 if (modified_in_p (addr, dep_insn))
23756 else if (ix86_agi_dependent (dep_insn, insn))
23759 /* ??? Compares pair with jump/setcc. */
23760 if (ix86_flags_dependent (insn, dep_insn, insn_type))
23763 /* Floating point stores require value to be ready one cycle earlier. */
23764 if (insn_type == TYPE_FMOV
23765 && get_attr_memory (insn) == MEMORY_STORE
23766 && !ix86_agi_dependent (dep_insn, insn))
23770 case PROCESSOR_PENTIUMPRO:
23771 memory = get_attr_memory (insn);
23773 /* INT->FP conversion is expensive. */
23774 if (get_attr_fp_int_src (dep_insn))
23777 /* There is one cycle extra latency between an FP op and a store. */
23778 if (insn_type == TYPE_FMOV
23779 && (set = single_set (dep_insn)) != NULL_RTX
23780 && (set2 = single_set (insn)) != NULL_RTX
23781 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
23782 && MEM_P (SET_DEST (set2)))
23785 /* Show ability of reorder buffer to hide latency of load by executing
23786 in parallel with previous instruction in case
23787 previous instruction is not needed to compute the address. */
23788 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23789 && !ix86_agi_dependent (dep_insn, insn))
23791 /* Claim moves to take one cycle, as core can issue one load
23792 at time and the next load can start cycle later. */
23793 if (dep_insn_type == TYPE_IMOV
23794 || dep_insn_type == TYPE_FMOV)
23802 memory = get_attr_memory (insn);
23804 /* The esp dependency is resolved before the instruction is really
23806 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
23807 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
23810 /* INT->FP conversion is expensive. */
23811 if (get_attr_fp_int_src (dep_insn))
23814 /* Show ability of reorder buffer to hide latency of load by executing
23815 in parallel with previous instruction in case
23816 previous instruction is not needed to compute the address. */
23817 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23818 && !ix86_agi_dependent (dep_insn, insn))
23820 /* Claim moves to take one cycle, as core can issue one load
23821 at time and the next load can start cycle later. */
23822 if (dep_insn_type == TYPE_IMOV
23823 || dep_insn_type == TYPE_FMOV)
23832 case PROCESSOR_ATHLON:
23834 case PROCESSOR_AMDFAM10:
23835 case PROCESSOR_BDVER1:
23836 case PROCESSOR_BDVER2:
23837 case PROCESSOR_BTVER1:
23838 case PROCESSOR_ATOM:
23839 case PROCESSOR_GENERIC32:
23840 case PROCESSOR_GENERIC64:
23841 memory = get_attr_memory (insn);
23843 /* Show ability of reorder buffer to hide latency of load by executing
23844 in parallel with previous instruction in case
23845 previous instruction is not needed to compute the address. */
23846 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23847 && !ix86_agi_dependent (dep_insn, insn))
23849 enum attr_unit unit = get_attr_unit (insn);
23852 /* Because of the difference between the length of integer and
23853 floating unit pipeline preparation stages, the memory operands
23854 for floating point are cheaper.
23856 ??? For Athlon it the difference is most probably 2. */
23857 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
23860 loadcost = TARGET_ATHLON ? 2 : 0;
23862 if (cost >= loadcost)
23875 /* How many alternative schedules to try. This should be as wide as the
23876 scheduling freedom in the DFA, but no wider. Making this value too
23877 large results extra work for the scheduler. */
23880 ia32_multipass_dfa_lookahead (void)
23884 case PROCESSOR_PENTIUM:
23887 case PROCESSOR_PENTIUMPRO:
23891 case PROCESSOR_CORE2_32:
23892 case PROCESSOR_CORE2_64:
23893 case PROCESSOR_COREI7_32:
23894 case PROCESSOR_COREI7_64:
23895 case PROCESSOR_ATOM:
23896 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23897 as many instructions can be executed on a cycle, i.e.,
23898 issue_rate. I wonder why tuning for many CPUs does not do this. */
23899 return ix86_issue_rate ();
23908 /* Model decoder of Core 2/i7.
23909 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
23910 track the instruction fetch block boundaries and make sure that long
23911 (9+ bytes) instructions are assigned to D0. */
23913 /* Maximum length of an insn that can be handled by
23914 a secondary decoder unit. '8' for Core 2/i7. */
23915 static int core2i7_secondary_decoder_max_insn_size;
23917 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
23918 '16' for Core 2/i7. */
23919 static int core2i7_ifetch_block_size;
23921 /* Maximum number of instructions decoder can handle per cycle.
23922 '6' for Core 2/i7. */
23923 static int core2i7_ifetch_block_max_insns;
23925 typedef struct ix86_first_cycle_multipass_data_ *
23926 ix86_first_cycle_multipass_data_t;
23927 typedef const struct ix86_first_cycle_multipass_data_ *
23928 const_ix86_first_cycle_multipass_data_t;
23930 /* A variable to store target state across calls to max_issue within
23932 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
23933 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
23935 /* Initialize DATA. */
23937 core2i7_first_cycle_multipass_init (void *_data)
23939 ix86_first_cycle_multipass_data_t data
23940 = (ix86_first_cycle_multipass_data_t) _data;
23942 data->ifetch_block_len = 0;
23943 data->ifetch_block_n_insns = 0;
23944 data->ready_try_change = NULL;
23945 data->ready_try_change_size = 0;
23948 /* Advancing the cycle; reset ifetch block counts. */
23950 core2i7_dfa_post_advance_cycle (void)
23952 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
23954 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
23956 data->ifetch_block_len = 0;
23957 data->ifetch_block_n_insns = 0;
23960 static int min_insn_size (rtx);
23962 /* Filter out insns from ready_try that the core will not be able to issue
23963 on current cycle due to decoder. */
23965 core2i7_first_cycle_multipass_filter_ready_try
23966 (const_ix86_first_cycle_multipass_data_t data,
23967 char *ready_try, int n_ready, bool first_cycle_insn_p)
23974 if (ready_try[n_ready])
23977 insn = get_ready_element (n_ready);
23978 insn_size = min_insn_size (insn);
23980 if (/* If this is a too long an insn for a secondary decoder ... */
23981 (!first_cycle_insn_p
23982 && insn_size > core2i7_secondary_decoder_max_insn_size)
23983 /* ... or it would not fit into the ifetch block ... */
23984 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
23985 /* ... or the decoder is full already ... */
23986 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
23987 /* ... mask the insn out. */
23989 ready_try[n_ready] = 1;
23991 if (data->ready_try_change)
23992 SET_BIT (data->ready_try_change, n_ready);
23997 /* Prepare for a new round of multipass lookahead scheduling. */
23999 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
24000 bool first_cycle_insn_p)
24002 ix86_first_cycle_multipass_data_t data
24003 = (ix86_first_cycle_multipass_data_t) _data;
24004 const_ix86_first_cycle_multipass_data_t prev_data
24005 = ix86_first_cycle_multipass_data;
24007 /* Restore the state from the end of the previous round. */
24008 data->ifetch_block_len = prev_data->ifetch_block_len;
24009 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
24011 /* Filter instructions that cannot be issued on current cycle due to
24012 decoder restrictions. */
24013 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
24014 first_cycle_insn_p);
24017 /* INSN is being issued in current solution. Account for its impact on
24018 the decoder model. */
24020 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
24021 rtx insn, const void *_prev_data)
24023 ix86_first_cycle_multipass_data_t data
24024 = (ix86_first_cycle_multipass_data_t) _data;
24025 const_ix86_first_cycle_multipass_data_t prev_data
24026 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
24028 int insn_size = min_insn_size (insn);
24030 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
24031 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
24032 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
24033 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
24035 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24036 if (!data->ready_try_change)
24038 data->ready_try_change = sbitmap_alloc (n_ready);
24039 data->ready_try_change_size = n_ready;
24041 else if (data->ready_try_change_size < n_ready)
24043 data->ready_try_change = sbitmap_resize (data->ready_try_change,
24045 data->ready_try_change_size = n_ready;
24047 sbitmap_zero (data->ready_try_change);
24049 /* Filter out insns from ready_try that the core will not be able to issue
24050 on current cycle due to decoder. */
24051 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
24055 /* Revert the effect on ready_try. */
24057 core2i7_first_cycle_multipass_backtrack (const void *_data,
24059 int n_ready ATTRIBUTE_UNUSED)
24061 const_ix86_first_cycle_multipass_data_t data
24062 = (const_ix86_first_cycle_multipass_data_t) _data;
24063 unsigned int i = 0;
24064 sbitmap_iterator sbi;
24066 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
24067 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
24073 /* Save the result of multipass lookahead scheduling for the next round. */
24075 core2i7_first_cycle_multipass_end (const void *_data)
24077 const_ix86_first_cycle_multipass_data_t data
24078 = (const_ix86_first_cycle_multipass_data_t) _data;
24079 ix86_first_cycle_multipass_data_t next_data
24080 = ix86_first_cycle_multipass_data;
24084 next_data->ifetch_block_len = data->ifetch_block_len;
24085 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
24089 /* Deallocate target data. */
24091 core2i7_first_cycle_multipass_fini (void *_data)
24093 ix86_first_cycle_multipass_data_t data
24094 = (ix86_first_cycle_multipass_data_t) _data;
24096 if (data->ready_try_change)
24098 sbitmap_free (data->ready_try_change);
24099 data->ready_try_change = NULL;
24100 data->ready_try_change_size = 0;
24104 /* Prepare for scheduling pass. */
24106 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
24107 int verbose ATTRIBUTE_UNUSED,
24108 int max_uid ATTRIBUTE_UNUSED)
24110 /* Install scheduling hooks for current CPU. Some of these hooks are used
24111 in time-critical parts of the scheduler, so we only set them up when
24112 they are actually used. */
24115 case PROCESSOR_CORE2_32:
24116 case PROCESSOR_CORE2_64:
24117 case PROCESSOR_COREI7_32:
24118 case PROCESSOR_COREI7_64:
24119 targetm.sched.dfa_post_advance_cycle
24120 = core2i7_dfa_post_advance_cycle;
24121 targetm.sched.first_cycle_multipass_init
24122 = core2i7_first_cycle_multipass_init;
24123 targetm.sched.first_cycle_multipass_begin
24124 = core2i7_first_cycle_multipass_begin;
24125 targetm.sched.first_cycle_multipass_issue
24126 = core2i7_first_cycle_multipass_issue;
24127 targetm.sched.first_cycle_multipass_backtrack
24128 = core2i7_first_cycle_multipass_backtrack;
24129 targetm.sched.first_cycle_multipass_end
24130 = core2i7_first_cycle_multipass_end;
24131 targetm.sched.first_cycle_multipass_fini
24132 = core2i7_first_cycle_multipass_fini;
24134 /* Set decoder parameters. */
24135 core2i7_secondary_decoder_max_insn_size = 8;
24136 core2i7_ifetch_block_size = 16;
24137 core2i7_ifetch_block_max_insns = 6;
24141 targetm.sched.dfa_post_advance_cycle = NULL;
24142 targetm.sched.first_cycle_multipass_init = NULL;
24143 targetm.sched.first_cycle_multipass_begin = NULL;
24144 targetm.sched.first_cycle_multipass_issue = NULL;
24145 targetm.sched.first_cycle_multipass_backtrack = NULL;
24146 targetm.sched.first_cycle_multipass_end = NULL;
24147 targetm.sched.first_cycle_multipass_fini = NULL;
24153 /* Compute the alignment given to a constant that is being placed in memory.
24154 EXP is the constant and ALIGN is the alignment that the object would
24156 The value of this function is used instead of that alignment to align
24160 ix86_constant_alignment (tree exp, int align)
24162 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
24163 || TREE_CODE (exp) == INTEGER_CST)
24165 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
24167 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
24170 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
24171 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
24172 return BITS_PER_WORD;
24177 /* Compute the alignment for a static variable.
24178 TYPE is the data type, and ALIGN is the alignment that
24179 the object would ordinarily have. The value of this function is used
24180 instead of that alignment to align the object. */
24183 ix86_data_alignment (tree type, int align)
24185 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
24187 if (AGGREGATE_TYPE_P (type)
24188 && TYPE_SIZE (type)
24189 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24190 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
24191 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
24192 && align < max_align)
24195 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24196 to 16byte boundary. */
24199 if (AGGREGATE_TYPE_P (type)
24200 && TYPE_SIZE (type)
24201 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24202 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
24203 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
24207 if (TREE_CODE (type) == ARRAY_TYPE)
24209 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
24211 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
24214 else if (TREE_CODE (type) == COMPLEX_TYPE)
24217 if (TYPE_MODE (type) == DCmode && align < 64)
24219 if ((TYPE_MODE (type) == XCmode
24220 || TYPE_MODE (type) == TCmode) && align < 128)
24223 else if ((TREE_CODE (type) == RECORD_TYPE
24224 || TREE_CODE (type) == UNION_TYPE
24225 || TREE_CODE (type) == QUAL_UNION_TYPE)
24226 && TYPE_FIELDS (type))
24228 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
24230 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
24233 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
24234 || TREE_CODE (type) == INTEGER_TYPE)
24236 if (TYPE_MODE (type) == DFmode && align < 64)
24238 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
24245 /* Compute the alignment for a local variable or a stack slot. EXP is
24246 the data type or decl itself, MODE is the widest mode available and
24247 ALIGN is the alignment that the object would ordinarily have. The
24248 value of this macro is used instead of that alignment to align the
24252 ix86_local_alignment (tree exp, enum machine_mode mode,
24253 unsigned int align)
24257 if (exp && DECL_P (exp))
24259 type = TREE_TYPE (exp);
24268 /* Don't do dynamic stack realignment for long long objects with
24269 -mpreferred-stack-boundary=2. */
24272 && ix86_preferred_stack_boundary < 64
24273 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
24274 && (!type || !TYPE_USER_ALIGN (type))
24275 && (!decl || !DECL_USER_ALIGN (decl)))
24278 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24279 register in MODE. We will return the largest alignment of XF
24283 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
24284 align = GET_MODE_ALIGNMENT (DFmode);
24288 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24289 to 16byte boundary. Exact wording is:
24291 An array uses the same alignment as its elements, except that a local or
24292 global array variable of length at least 16 bytes or
24293 a C99 variable-length array variable always has alignment of at least 16 bytes.
24295 This was added to allow use of aligned SSE instructions at arrays. This
24296 rule is meant for static storage (where compiler can not do the analysis
24297 by itself). We follow it for automatic variables only when convenient.
24298 We fully control everything in the function compiled and functions from
24299 other unit can not rely on the alignment.
24301 Exclude va_list type. It is the common case of local array where
24302 we can not benefit from the alignment. */
24303 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
24306 if (AGGREGATE_TYPE_P (type)
24307 && (va_list_type_node == NULL_TREE
24308 || (TYPE_MAIN_VARIANT (type)
24309 != TYPE_MAIN_VARIANT (va_list_type_node)))
24310 && TYPE_SIZE (type)
24311 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24312 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
24313 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
24316 if (TREE_CODE (type) == ARRAY_TYPE)
24318 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
24320 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
24323 else if (TREE_CODE (type) == COMPLEX_TYPE)
24325 if (TYPE_MODE (type) == DCmode && align < 64)
24327 if ((TYPE_MODE (type) == XCmode
24328 || TYPE_MODE (type) == TCmode) && align < 128)
24331 else if ((TREE_CODE (type) == RECORD_TYPE
24332 || TREE_CODE (type) == UNION_TYPE
24333 || TREE_CODE (type) == QUAL_UNION_TYPE)
24334 && TYPE_FIELDS (type))
24336 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
24338 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
24341 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
24342 || TREE_CODE (type) == INTEGER_TYPE)
24345 if (TYPE_MODE (type) == DFmode && align < 64)
24347 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
24353 /* Compute the minimum required alignment for dynamic stack realignment
24354 purposes for a local variable, parameter or a stack slot. EXP is
24355 the data type or decl itself, MODE is its mode and ALIGN is the
24356 alignment that the object would ordinarily have. */
24359 ix86_minimum_alignment (tree exp, enum machine_mode mode,
24360 unsigned int align)
24364 if (exp && DECL_P (exp))
24366 type = TREE_TYPE (exp);
24375 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
24378 /* Don't do dynamic stack realignment for long long objects with
24379 -mpreferred-stack-boundary=2. */
24380 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
24381 && (!type || !TYPE_USER_ALIGN (type))
24382 && (!decl || !DECL_USER_ALIGN (decl)))
24388 /* Find a location for the static chain incoming to a nested function.
24389 This is a register, unless all free registers are used by arguments. */
24392 ix86_static_chain (const_tree fndecl, bool incoming_p)
24396 if (!DECL_STATIC_CHAIN (fndecl))
24401 /* We always use R10 in 64-bit mode. */
24409 /* By default in 32-bit mode we use ECX to pass the static chain. */
24412 fntype = TREE_TYPE (fndecl);
24413 ccvt = ix86_get_callcvt (fntype);
24414 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
24416 /* Fastcall functions use ecx/edx for arguments, which leaves
24417 us with EAX for the static chain.
24418 Thiscall functions use ecx for arguments, which also
24419 leaves us with EAX for the static chain. */
24422 else if (ix86_function_regparm (fntype, fndecl) == 3)
24424 /* For regparm 3, we have no free call-clobbered registers in
24425 which to store the static chain. In order to implement this,
24426 we have the trampoline push the static chain to the stack.
24427 However, we can't push a value below the return address when
24428 we call the nested function directly, so we have to use an
24429 alternate entry point. For this we use ESI, and have the
24430 alternate entry point push ESI, so that things appear the
24431 same once we're executing the nested function. */
24434 if (fndecl == current_function_decl)
24435 ix86_static_chain_on_stack = true;
24436 return gen_frame_mem (SImode,
24437 plus_constant (arg_pointer_rtx, -8));
24443 return gen_rtx_REG (Pmode, regno);
24446 /* Emit RTL insns to initialize the variable parts of a trampoline.
24447 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24448 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24449 to be passed to the target function. */
24452 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
24458 fnaddr = XEXP (DECL_RTL (fndecl), 0);
24464 /* Load the function address to r11. Try to load address using
24465 the shorter movl instead of movabs. We may want to support
24466 movq for kernel mode, but kernel does not use trampolines at
24468 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
24470 fnaddr = copy_to_mode_reg (DImode, fnaddr);
24472 mem = adjust_address (m_tramp, HImode, offset);
24473 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
24475 mem = adjust_address (m_tramp, SImode, offset + 2);
24476 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
24481 mem = adjust_address (m_tramp, HImode, offset);
24482 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
24484 mem = adjust_address (m_tramp, DImode, offset + 2);
24485 emit_move_insn (mem, fnaddr);
24489 /* Load static chain using movabs to r10. Use the
24490 shorter movl instead of movabs for x32. */
24502 mem = adjust_address (m_tramp, HImode, offset);
24503 emit_move_insn (mem, gen_int_mode (opcode, HImode));
24505 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
24506 emit_move_insn (mem, chain_value);
24509 /* Jump to r11; the last (unused) byte is a nop, only there to
24510 pad the write out to a single 32-bit store. */
24511 mem = adjust_address (m_tramp, SImode, offset);
24512 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
24519 /* Depending on the static chain location, either load a register
24520 with a constant, or push the constant to the stack. All of the
24521 instructions are the same size. */
24522 chain = ix86_static_chain (fndecl, true);
24525 switch (REGNO (chain))
24528 opcode = 0xb8; break;
24530 opcode = 0xb9; break;
24532 gcc_unreachable ();
24538 mem = adjust_address (m_tramp, QImode, offset);
24539 emit_move_insn (mem, gen_int_mode (opcode, QImode));
24541 mem = adjust_address (m_tramp, SImode, offset + 1);
24542 emit_move_insn (mem, chain_value);
24545 mem = adjust_address (m_tramp, QImode, offset);
24546 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
24548 mem = adjust_address (m_tramp, SImode, offset + 1);
24550 /* Compute offset from the end of the jmp to the target function.
24551 In the case in which the trampoline stores the static chain on
24552 the stack, we need to skip the first insn which pushes the
24553 (call-saved) register static chain; this push is 1 byte. */
24555 disp = expand_binop (SImode, sub_optab, fnaddr,
24556 plus_constant (XEXP (m_tramp, 0),
24557 offset - (MEM_P (chain) ? 1 : 0)),
24558 NULL_RTX, 1, OPTAB_DIRECT);
24559 emit_move_insn (mem, disp);
24562 gcc_assert (offset <= TRAMPOLINE_SIZE);
24564 #ifdef HAVE_ENABLE_EXECUTE_STACK
24565 #ifdef CHECK_EXECUTE_STACK_ENABLED
24566 if (CHECK_EXECUTE_STACK_ENABLED)
24568 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
24569 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
24573 /* The following file contains several enumerations and data structures
24574 built from the definitions in i386-builtin-types.def. */
24576 #include "i386-builtin-types.inc"
24578 /* Table for the ix86 builtin non-function types. */
24579 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
24581 /* Retrieve an element from the above table, building some of
24582 the types lazily. */
24585 ix86_get_builtin_type (enum ix86_builtin_type tcode)
24587 unsigned int index;
24590 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
24592 type = ix86_builtin_type_tab[(int) tcode];
24596 gcc_assert (tcode > IX86_BT_LAST_PRIM);
24597 if (tcode <= IX86_BT_LAST_VECT)
24599 enum machine_mode mode;
24601 index = tcode - IX86_BT_LAST_PRIM - 1;
24602 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
24603 mode = ix86_builtin_type_vect_mode[index];
24605 type = build_vector_type_for_mode (itype, mode);
24611 index = tcode - IX86_BT_LAST_VECT - 1;
24612 if (tcode <= IX86_BT_LAST_PTR)
24613 quals = TYPE_UNQUALIFIED;
24615 quals = TYPE_QUAL_CONST;
24617 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
24618 if (quals != TYPE_UNQUALIFIED)
24619 itype = build_qualified_type (itype, quals);
24621 type = build_pointer_type (itype);
24624 ix86_builtin_type_tab[(int) tcode] = type;
24628 /* Table for the ix86 builtin function types. */
24629 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
24631 /* Retrieve an element from the above table, building some of
24632 the types lazily. */
24635 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
24639 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
24641 type = ix86_builtin_func_type_tab[(int) tcode];
24645 if (tcode <= IX86_BT_LAST_FUNC)
24647 unsigned start = ix86_builtin_func_start[(int) tcode];
24648 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
24649 tree rtype, atype, args = void_list_node;
24652 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
24653 for (i = after - 1; i > start; --i)
24655 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
24656 args = tree_cons (NULL, atype, args);
24659 type = build_function_type (rtype, args);
24663 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
24664 enum ix86_builtin_func_type icode;
24666 icode = ix86_builtin_func_alias_base[index];
24667 type = ix86_get_builtin_func_type (icode);
24670 ix86_builtin_func_type_tab[(int) tcode] = type;
24675 /* Codes for all the SSE/MMX builtins. */
24678 IX86_BUILTIN_ADDPS,
24679 IX86_BUILTIN_ADDSS,
24680 IX86_BUILTIN_DIVPS,
24681 IX86_BUILTIN_DIVSS,
24682 IX86_BUILTIN_MULPS,
24683 IX86_BUILTIN_MULSS,
24684 IX86_BUILTIN_SUBPS,
24685 IX86_BUILTIN_SUBSS,
24687 IX86_BUILTIN_CMPEQPS,
24688 IX86_BUILTIN_CMPLTPS,
24689 IX86_BUILTIN_CMPLEPS,
24690 IX86_BUILTIN_CMPGTPS,
24691 IX86_BUILTIN_CMPGEPS,
24692 IX86_BUILTIN_CMPNEQPS,
24693 IX86_BUILTIN_CMPNLTPS,
24694 IX86_BUILTIN_CMPNLEPS,
24695 IX86_BUILTIN_CMPNGTPS,
24696 IX86_BUILTIN_CMPNGEPS,
24697 IX86_BUILTIN_CMPORDPS,
24698 IX86_BUILTIN_CMPUNORDPS,
24699 IX86_BUILTIN_CMPEQSS,
24700 IX86_BUILTIN_CMPLTSS,
24701 IX86_BUILTIN_CMPLESS,
24702 IX86_BUILTIN_CMPNEQSS,
24703 IX86_BUILTIN_CMPNLTSS,
24704 IX86_BUILTIN_CMPNLESS,
24705 IX86_BUILTIN_CMPNGTSS,
24706 IX86_BUILTIN_CMPNGESS,
24707 IX86_BUILTIN_CMPORDSS,
24708 IX86_BUILTIN_CMPUNORDSS,
24710 IX86_BUILTIN_COMIEQSS,
24711 IX86_BUILTIN_COMILTSS,
24712 IX86_BUILTIN_COMILESS,
24713 IX86_BUILTIN_COMIGTSS,
24714 IX86_BUILTIN_COMIGESS,
24715 IX86_BUILTIN_COMINEQSS,
24716 IX86_BUILTIN_UCOMIEQSS,
24717 IX86_BUILTIN_UCOMILTSS,
24718 IX86_BUILTIN_UCOMILESS,
24719 IX86_BUILTIN_UCOMIGTSS,
24720 IX86_BUILTIN_UCOMIGESS,
24721 IX86_BUILTIN_UCOMINEQSS,
24723 IX86_BUILTIN_CVTPI2PS,
24724 IX86_BUILTIN_CVTPS2PI,
24725 IX86_BUILTIN_CVTSI2SS,
24726 IX86_BUILTIN_CVTSI642SS,
24727 IX86_BUILTIN_CVTSS2SI,
24728 IX86_BUILTIN_CVTSS2SI64,
24729 IX86_BUILTIN_CVTTPS2PI,
24730 IX86_BUILTIN_CVTTSS2SI,
24731 IX86_BUILTIN_CVTTSS2SI64,
24733 IX86_BUILTIN_MAXPS,
24734 IX86_BUILTIN_MAXSS,
24735 IX86_BUILTIN_MINPS,
24736 IX86_BUILTIN_MINSS,
24738 IX86_BUILTIN_LOADUPS,
24739 IX86_BUILTIN_STOREUPS,
24740 IX86_BUILTIN_MOVSS,
24742 IX86_BUILTIN_MOVHLPS,
24743 IX86_BUILTIN_MOVLHPS,
24744 IX86_BUILTIN_LOADHPS,
24745 IX86_BUILTIN_LOADLPS,
24746 IX86_BUILTIN_STOREHPS,
24747 IX86_BUILTIN_STORELPS,
24749 IX86_BUILTIN_MASKMOVQ,
24750 IX86_BUILTIN_MOVMSKPS,
24751 IX86_BUILTIN_PMOVMSKB,
24753 IX86_BUILTIN_MOVNTPS,
24754 IX86_BUILTIN_MOVNTQ,
24756 IX86_BUILTIN_LOADDQU,
24757 IX86_BUILTIN_STOREDQU,
24759 IX86_BUILTIN_PACKSSWB,
24760 IX86_BUILTIN_PACKSSDW,
24761 IX86_BUILTIN_PACKUSWB,
24763 IX86_BUILTIN_PADDB,
24764 IX86_BUILTIN_PADDW,
24765 IX86_BUILTIN_PADDD,
24766 IX86_BUILTIN_PADDQ,
24767 IX86_BUILTIN_PADDSB,
24768 IX86_BUILTIN_PADDSW,
24769 IX86_BUILTIN_PADDUSB,
24770 IX86_BUILTIN_PADDUSW,
24771 IX86_BUILTIN_PSUBB,
24772 IX86_BUILTIN_PSUBW,
24773 IX86_BUILTIN_PSUBD,
24774 IX86_BUILTIN_PSUBQ,
24775 IX86_BUILTIN_PSUBSB,
24776 IX86_BUILTIN_PSUBSW,
24777 IX86_BUILTIN_PSUBUSB,
24778 IX86_BUILTIN_PSUBUSW,
24781 IX86_BUILTIN_PANDN,
24785 IX86_BUILTIN_PAVGB,
24786 IX86_BUILTIN_PAVGW,
24788 IX86_BUILTIN_PCMPEQB,
24789 IX86_BUILTIN_PCMPEQW,
24790 IX86_BUILTIN_PCMPEQD,
24791 IX86_BUILTIN_PCMPGTB,
24792 IX86_BUILTIN_PCMPGTW,
24793 IX86_BUILTIN_PCMPGTD,
24795 IX86_BUILTIN_PMADDWD,
24797 IX86_BUILTIN_PMAXSW,
24798 IX86_BUILTIN_PMAXUB,
24799 IX86_BUILTIN_PMINSW,
24800 IX86_BUILTIN_PMINUB,
24802 IX86_BUILTIN_PMULHUW,
24803 IX86_BUILTIN_PMULHW,
24804 IX86_BUILTIN_PMULLW,
24806 IX86_BUILTIN_PSADBW,
24807 IX86_BUILTIN_PSHUFW,
24809 IX86_BUILTIN_PSLLW,
24810 IX86_BUILTIN_PSLLD,
24811 IX86_BUILTIN_PSLLQ,
24812 IX86_BUILTIN_PSRAW,
24813 IX86_BUILTIN_PSRAD,
24814 IX86_BUILTIN_PSRLW,
24815 IX86_BUILTIN_PSRLD,
24816 IX86_BUILTIN_PSRLQ,
24817 IX86_BUILTIN_PSLLWI,
24818 IX86_BUILTIN_PSLLDI,
24819 IX86_BUILTIN_PSLLQI,
24820 IX86_BUILTIN_PSRAWI,
24821 IX86_BUILTIN_PSRADI,
24822 IX86_BUILTIN_PSRLWI,
24823 IX86_BUILTIN_PSRLDI,
24824 IX86_BUILTIN_PSRLQI,
24826 IX86_BUILTIN_PUNPCKHBW,
24827 IX86_BUILTIN_PUNPCKHWD,
24828 IX86_BUILTIN_PUNPCKHDQ,
24829 IX86_BUILTIN_PUNPCKLBW,
24830 IX86_BUILTIN_PUNPCKLWD,
24831 IX86_BUILTIN_PUNPCKLDQ,
24833 IX86_BUILTIN_SHUFPS,
24835 IX86_BUILTIN_RCPPS,
24836 IX86_BUILTIN_RCPSS,
24837 IX86_BUILTIN_RSQRTPS,
24838 IX86_BUILTIN_RSQRTPS_NR,
24839 IX86_BUILTIN_RSQRTSS,
24840 IX86_BUILTIN_RSQRTF,
24841 IX86_BUILTIN_SQRTPS,
24842 IX86_BUILTIN_SQRTPS_NR,
24843 IX86_BUILTIN_SQRTSS,
24845 IX86_BUILTIN_UNPCKHPS,
24846 IX86_BUILTIN_UNPCKLPS,
24848 IX86_BUILTIN_ANDPS,
24849 IX86_BUILTIN_ANDNPS,
24851 IX86_BUILTIN_XORPS,
24854 IX86_BUILTIN_LDMXCSR,
24855 IX86_BUILTIN_STMXCSR,
24856 IX86_BUILTIN_SFENCE,
24858 /* 3DNow! Original */
24859 IX86_BUILTIN_FEMMS,
24860 IX86_BUILTIN_PAVGUSB,
24861 IX86_BUILTIN_PF2ID,
24862 IX86_BUILTIN_PFACC,
24863 IX86_BUILTIN_PFADD,
24864 IX86_BUILTIN_PFCMPEQ,
24865 IX86_BUILTIN_PFCMPGE,
24866 IX86_BUILTIN_PFCMPGT,
24867 IX86_BUILTIN_PFMAX,
24868 IX86_BUILTIN_PFMIN,
24869 IX86_BUILTIN_PFMUL,
24870 IX86_BUILTIN_PFRCP,
24871 IX86_BUILTIN_PFRCPIT1,
24872 IX86_BUILTIN_PFRCPIT2,
24873 IX86_BUILTIN_PFRSQIT1,
24874 IX86_BUILTIN_PFRSQRT,
24875 IX86_BUILTIN_PFSUB,
24876 IX86_BUILTIN_PFSUBR,
24877 IX86_BUILTIN_PI2FD,
24878 IX86_BUILTIN_PMULHRW,
24880 /* 3DNow! Athlon Extensions */
24881 IX86_BUILTIN_PF2IW,
24882 IX86_BUILTIN_PFNACC,
24883 IX86_BUILTIN_PFPNACC,
24884 IX86_BUILTIN_PI2FW,
24885 IX86_BUILTIN_PSWAPDSI,
24886 IX86_BUILTIN_PSWAPDSF,
24889 IX86_BUILTIN_ADDPD,
24890 IX86_BUILTIN_ADDSD,
24891 IX86_BUILTIN_DIVPD,
24892 IX86_BUILTIN_DIVSD,
24893 IX86_BUILTIN_MULPD,
24894 IX86_BUILTIN_MULSD,
24895 IX86_BUILTIN_SUBPD,
24896 IX86_BUILTIN_SUBSD,
24898 IX86_BUILTIN_CMPEQPD,
24899 IX86_BUILTIN_CMPLTPD,
24900 IX86_BUILTIN_CMPLEPD,
24901 IX86_BUILTIN_CMPGTPD,
24902 IX86_BUILTIN_CMPGEPD,
24903 IX86_BUILTIN_CMPNEQPD,
24904 IX86_BUILTIN_CMPNLTPD,
24905 IX86_BUILTIN_CMPNLEPD,
24906 IX86_BUILTIN_CMPNGTPD,
24907 IX86_BUILTIN_CMPNGEPD,
24908 IX86_BUILTIN_CMPORDPD,
24909 IX86_BUILTIN_CMPUNORDPD,
24910 IX86_BUILTIN_CMPEQSD,
24911 IX86_BUILTIN_CMPLTSD,
24912 IX86_BUILTIN_CMPLESD,
24913 IX86_BUILTIN_CMPNEQSD,
24914 IX86_BUILTIN_CMPNLTSD,
24915 IX86_BUILTIN_CMPNLESD,
24916 IX86_BUILTIN_CMPORDSD,
24917 IX86_BUILTIN_CMPUNORDSD,
24919 IX86_BUILTIN_COMIEQSD,
24920 IX86_BUILTIN_COMILTSD,
24921 IX86_BUILTIN_COMILESD,
24922 IX86_BUILTIN_COMIGTSD,
24923 IX86_BUILTIN_COMIGESD,
24924 IX86_BUILTIN_COMINEQSD,
24925 IX86_BUILTIN_UCOMIEQSD,
24926 IX86_BUILTIN_UCOMILTSD,
24927 IX86_BUILTIN_UCOMILESD,
24928 IX86_BUILTIN_UCOMIGTSD,
24929 IX86_BUILTIN_UCOMIGESD,
24930 IX86_BUILTIN_UCOMINEQSD,
24932 IX86_BUILTIN_MAXPD,
24933 IX86_BUILTIN_MAXSD,
24934 IX86_BUILTIN_MINPD,
24935 IX86_BUILTIN_MINSD,
24937 IX86_BUILTIN_ANDPD,
24938 IX86_BUILTIN_ANDNPD,
24940 IX86_BUILTIN_XORPD,
24942 IX86_BUILTIN_SQRTPD,
24943 IX86_BUILTIN_SQRTSD,
24945 IX86_BUILTIN_UNPCKHPD,
24946 IX86_BUILTIN_UNPCKLPD,
24948 IX86_BUILTIN_SHUFPD,
24950 IX86_BUILTIN_LOADUPD,
24951 IX86_BUILTIN_STOREUPD,
24952 IX86_BUILTIN_MOVSD,
24954 IX86_BUILTIN_LOADHPD,
24955 IX86_BUILTIN_LOADLPD,
24957 IX86_BUILTIN_CVTDQ2PD,
24958 IX86_BUILTIN_CVTDQ2PS,
24960 IX86_BUILTIN_CVTPD2DQ,
24961 IX86_BUILTIN_CVTPD2PI,
24962 IX86_BUILTIN_CVTPD2PS,
24963 IX86_BUILTIN_CVTTPD2DQ,
24964 IX86_BUILTIN_CVTTPD2PI,
24966 IX86_BUILTIN_CVTPI2PD,
24967 IX86_BUILTIN_CVTSI2SD,
24968 IX86_BUILTIN_CVTSI642SD,
24970 IX86_BUILTIN_CVTSD2SI,
24971 IX86_BUILTIN_CVTSD2SI64,
24972 IX86_BUILTIN_CVTSD2SS,
24973 IX86_BUILTIN_CVTSS2SD,
24974 IX86_BUILTIN_CVTTSD2SI,
24975 IX86_BUILTIN_CVTTSD2SI64,
24977 IX86_BUILTIN_CVTPS2DQ,
24978 IX86_BUILTIN_CVTPS2PD,
24979 IX86_BUILTIN_CVTTPS2DQ,
24981 IX86_BUILTIN_MOVNTI,
24982 IX86_BUILTIN_MOVNTI64,
24983 IX86_BUILTIN_MOVNTPD,
24984 IX86_BUILTIN_MOVNTDQ,
24986 IX86_BUILTIN_MOVQ128,
24989 IX86_BUILTIN_MASKMOVDQU,
24990 IX86_BUILTIN_MOVMSKPD,
24991 IX86_BUILTIN_PMOVMSKB128,
24993 IX86_BUILTIN_PACKSSWB128,
24994 IX86_BUILTIN_PACKSSDW128,
24995 IX86_BUILTIN_PACKUSWB128,
24997 IX86_BUILTIN_PADDB128,
24998 IX86_BUILTIN_PADDW128,
24999 IX86_BUILTIN_PADDD128,
25000 IX86_BUILTIN_PADDQ128,
25001 IX86_BUILTIN_PADDSB128,
25002 IX86_BUILTIN_PADDSW128,
25003 IX86_BUILTIN_PADDUSB128,
25004 IX86_BUILTIN_PADDUSW128,
25005 IX86_BUILTIN_PSUBB128,
25006 IX86_BUILTIN_PSUBW128,
25007 IX86_BUILTIN_PSUBD128,
25008 IX86_BUILTIN_PSUBQ128,
25009 IX86_BUILTIN_PSUBSB128,
25010 IX86_BUILTIN_PSUBSW128,
25011 IX86_BUILTIN_PSUBUSB128,
25012 IX86_BUILTIN_PSUBUSW128,
25014 IX86_BUILTIN_PAND128,
25015 IX86_BUILTIN_PANDN128,
25016 IX86_BUILTIN_POR128,
25017 IX86_BUILTIN_PXOR128,
25019 IX86_BUILTIN_PAVGB128,
25020 IX86_BUILTIN_PAVGW128,
25022 IX86_BUILTIN_PCMPEQB128,
25023 IX86_BUILTIN_PCMPEQW128,
25024 IX86_BUILTIN_PCMPEQD128,
25025 IX86_BUILTIN_PCMPGTB128,
25026 IX86_BUILTIN_PCMPGTW128,
25027 IX86_BUILTIN_PCMPGTD128,
25029 IX86_BUILTIN_PMADDWD128,
25031 IX86_BUILTIN_PMAXSW128,
25032 IX86_BUILTIN_PMAXUB128,
25033 IX86_BUILTIN_PMINSW128,
25034 IX86_BUILTIN_PMINUB128,
25036 IX86_BUILTIN_PMULUDQ,
25037 IX86_BUILTIN_PMULUDQ128,
25038 IX86_BUILTIN_PMULHUW128,
25039 IX86_BUILTIN_PMULHW128,
25040 IX86_BUILTIN_PMULLW128,
25042 IX86_BUILTIN_PSADBW128,
25043 IX86_BUILTIN_PSHUFHW,
25044 IX86_BUILTIN_PSHUFLW,
25045 IX86_BUILTIN_PSHUFD,
25047 IX86_BUILTIN_PSLLDQI128,
25048 IX86_BUILTIN_PSLLWI128,
25049 IX86_BUILTIN_PSLLDI128,
25050 IX86_BUILTIN_PSLLQI128,
25051 IX86_BUILTIN_PSRAWI128,
25052 IX86_BUILTIN_PSRADI128,
25053 IX86_BUILTIN_PSRLDQI128,
25054 IX86_BUILTIN_PSRLWI128,
25055 IX86_BUILTIN_PSRLDI128,
25056 IX86_BUILTIN_PSRLQI128,
25058 IX86_BUILTIN_PSLLDQ128,
25059 IX86_BUILTIN_PSLLW128,
25060 IX86_BUILTIN_PSLLD128,
25061 IX86_BUILTIN_PSLLQ128,
25062 IX86_BUILTIN_PSRAW128,
25063 IX86_BUILTIN_PSRAD128,
25064 IX86_BUILTIN_PSRLW128,
25065 IX86_BUILTIN_PSRLD128,
25066 IX86_BUILTIN_PSRLQ128,
25068 IX86_BUILTIN_PUNPCKHBW128,
25069 IX86_BUILTIN_PUNPCKHWD128,
25070 IX86_BUILTIN_PUNPCKHDQ128,
25071 IX86_BUILTIN_PUNPCKHQDQ128,
25072 IX86_BUILTIN_PUNPCKLBW128,
25073 IX86_BUILTIN_PUNPCKLWD128,
25074 IX86_BUILTIN_PUNPCKLDQ128,
25075 IX86_BUILTIN_PUNPCKLQDQ128,
25077 IX86_BUILTIN_CLFLUSH,
25078 IX86_BUILTIN_MFENCE,
25079 IX86_BUILTIN_LFENCE,
25080 IX86_BUILTIN_PAUSE,
25082 IX86_BUILTIN_BSRSI,
25083 IX86_BUILTIN_BSRDI,
25084 IX86_BUILTIN_RDPMC,
25085 IX86_BUILTIN_RDTSC,
25086 IX86_BUILTIN_RDTSCP,
25087 IX86_BUILTIN_ROLQI,
25088 IX86_BUILTIN_ROLHI,
25089 IX86_BUILTIN_RORQI,
25090 IX86_BUILTIN_RORHI,
25093 IX86_BUILTIN_ADDSUBPS,
25094 IX86_BUILTIN_HADDPS,
25095 IX86_BUILTIN_HSUBPS,
25096 IX86_BUILTIN_MOVSHDUP,
25097 IX86_BUILTIN_MOVSLDUP,
25098 IX86_BUILTIN_ADDSUBPD,
25099 IX86_BUILTIN_HADDPD,
25100 IX86_BUILTIN_HSUBPD,
25101 IX86_BUILTIN_LDDQU,
25103 IX86_BUILTIN_MONITOR,
25104 IX86_BUILTIN_MWAIT,
25107 IX86_BUILTIN_PHADDW,
25108 IX86_BUILTIN_PHADDD,
25109 IX86_BUILTIN_PHADDSW,
25110 IX86_BUILTIN_PHSUBW,
25111 IX86_BUILTIN_PHSUBD,
25112 IX86_BUILTIN_PHSUBSW,
25113 IX86_BUILTIN_PMADDUBSW,
25114 IX86_BUILTIN_PMULHRSW,
25115 IX86_BUILTIN_PSHUFB,
25116 IX86_BUILTIN_PSIGNB,
25117 IX86_BUILTIN_PSIGNW,
25118 IX86_BUILTIN_PSIGND,
25119 IX86_BUILTIN_PALIGNR,
25120 IX86_BUILTIN_PABSB,
25121 IX86_BUILTIN_PABSW,
25122 IX86_BUILTIN_PABSD,
25124 IX86_BUILTIN_PHADDW128,
25125 IX86_BUILTIN_PHADDD128,
25126 IX86_BUILTIN_PHADDSW128,
25127 IX86_BUILTIN_PHSUBW128,
25128 IX86_BUILTIN_PHSUBD128,
25129 IX86_BUILTIN_PHSUBSW128,
25130 IX86_BUILTIN_PMADDUBSW128,
25131 IX86_BUILTIN_PMULHRSW128,
25132 IX86_BUILTIN_PSHUFB128,
25133 IX86_BUILTIN_PSIGNB128,
25134 IX86_BUILTIN_PSIGNW128,
25135 IX86_BUILTIN_PSIGND128,
25136 IX86_BUILTIN_PALIGNR128,
25137 IX86_BUILTIN_PABSB128,
25138 IX86_BUILTIN_PABSW128,
25139 IX86_BUILTIN_PABSD128,
25141 /* AMDFAM10 - SSE4A New Instructions. */
25142 IX86_BUILTIN_MOVNTSD,
25143 IX86_BUILTIN_MOVNTSS,
25144 IX86_BUILTIN_EXTRQI,
25145 IX86_BUILTIN_EXTRQ,
25146 IX86_BUILTIN_INSERTQI,
25147 IX86_BUILTIN_INSERTQ,
25150 IX86_BUILTIN_BLENDPD,
25151 IX86_BUILTIN_BLENDPS,
25152 IX86_BUILTIN_BLENDVPD,
25153 IX86_BUILTIN_BLENDVPS,
25154 IX86_BUILTIN_PBLENDVB128,
25155 IX86_BUILTIN_PBLENDW128,
25160 IX86_BUILTIN_INSERTPS128,
25162 IX86_BUILTIN_MOVNTDQA,
25163 IX86_BUILTIN_MPSADBW128,
25164 IX86_BUILTIN_PACKUSDW128,
25165 IX86_BUILTIN_PCMPEQQ,
25166 IX86_BUILTIN_PHMINPOSUW128,
25168 IX86_BUILTIN_PMAXSB128,
25169 IX86_BUILTIN_PMAXSD128,
25170 IX86_BUILTIN_PMAXUD128,
25171 IX86_BUILTIN_PMAXUW128,
25173 IX86_BUILTIN_PMINSB128,
25174 IX86_BUILTIN_PMINSD128,
25175 IX86_BUILTIN_PMINUD128,
25176 IX86_BUILTIN_PMINUW128,
25178 IX86_BUILTIN_PMOVSXBW128,
25179 IX86_BUILTIN_PMOVSXBD128,
25180 IX86_BUILTIN_PMOVSXBQ128,
25181 IX86_BUILTIN_PMOVSXWD128,
25182 IX86_BUILTIN_PMOVSXWQ128,
25183 IX86_BUILTIN_PMOVSXDQ128,
25185 IX86_BUILTIN_PMOVZXBW128,
25186 IX86_BUILTIN_PMOVZXBD128,
25187 IX86_BUILTIN_PMOVZXBQ128,
25188 IX86_BUILTIN_PMOVZXWD128,
25189 IX86_BUILTIN_PMOVZXWQ128,
25190 IX86_BUILTIN_PMOVZXDQ128,
25192 IX86_BUILTIN_PMULDQ128,
25193 IX86_BUILTIN_PMULLD128,
25195 IX86_BUILTIN_ROUNDSD,
25196 IX86_BUILTIN_ROUNDSS,
25198 IX86_BUILTIN_ROUNDPD,
25199 IX86_BUILTIN_ROUNDPS,
25201 IX86_BUILTIN_FLOORPD,
25202 IX86_BUILTIN_CEILPD,
25203 IX86_BUILTIN_TRUNCPD,
25204 IX86_BUILTIN_RINTPD,
25205 IX86_BUILTIN_ROUNDPD_AZ,
25207 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
25208 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
25209 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
25211 IX86_BUILTIN_FLOORPS,
25212 IX86_BUILTIN_CEILPS,
25213 IX86_BUILTIN_TRUNCPS,
25214 IX86_BUILTIN_RINTPS,
25215 IX86_BUILTIN_ROUNDPS_AZ,
25217 IX86_BUILTIN_FLOORPS_SFIX,
25218 IX86_BUILTIN_CEILPS_SFIX,
25219 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
25221 IX86_BUILTIN_PTESTZ,
25222 IX86_BUILTIN_PTESTC,
25223 IX86_BUILTIN_PTESTNZC,
25225 IX86_BUILTIN_VEC_INIT_V2SI,
25226 IX86_BUILTIN_VEC_INIT_V4HI,
25227 IX86_BUILTIN_VEC_INIT_V8QI,
25228 IX86_BUILTIN_VEC_EXT_V2DF,
25229 IX86_BUILTIN_VEC_EXT_V2DI,
25230 IX86_BUILTIN_VEC_EXT_V4SF,
25231 IX86_BUILTIN_VEC_EXT_V4SI,
25232 IX86_BUILTIN_VEC_EXT_V8HI,
25233 IX86_BUILTIN_VEC_EXT_V2SI,
25234 IX86_BUILTIN_VEC_EXT_V4HI,
25235 IX86_BUILTIN_VEC_EXT_V16QI,
25236 IX86_BUILTIN_VEC_SET_V2DI,
25237 IX86_BUILTIN_VEC_SET_V4SF,
25238 IX86_BUILTIN_VEC_SET_V4SI,
25239 IX86_BUILTIN_VEC_SET_V8HI,
25240 IX86_BUILTIN_VEC_SET_V4HI,
25241 IX86_BUILTIN_VEC_SET_V16QI,
25243 IX86_BUILTIN_VEC_PACK_SFIX,
25244 IX86_BUILTIN_VEC_PACK_SFIX256,
25247 IX86_BUILTIN_CRC32QI,
25248 IX86_BUILTIN_CRC32HI,
25249 IX86_BUILTIN_CRC32SI,
25250 IX86_BUILTIN_CRC32DI,
25252 IX86_BUILTIN_PCMPESTRI128,
25253 IX86_BUILTIN_PCMPESTRM128,
25254 IX86_BUILTIN_PCMPESTRA128,
25255 IX86_BUILTIN_PCMPESTRC128,
25256 IX86_BUILTIN_PCMPESTRO128,
25257 IX86_BUILTIN_PCMPESTRS128,
25258 IX86_BUILTIN_PCMPESTRZ128,
25259 IX86_BUILTIN_PCMPISTRI128,
25260 IX86_BUILTIN_PCMPISTRM128,
25261 IX86_BUILTIN_PCMPISTRA128,
25262 IX86_BUILTIN_PCMPISTRC128,
25263 IX86_BUILTIN_PCMPISTRO128,
25264 IX86_BUILTIN_PCMPISTRS128,
25265 IX86_BUILTIN_PCMPISTRZ128,
25267 IX86_BUILTIN_PCMPGTQ,
25269 /* AES instructions */
25270 IX86_BUILTIN_AESENC128,
25271 IX86_BUILTIN_AESENCLAST128,
25272 IX86_BUILTIN_AESDEC128,
25273 IX86_BUILTIN_AESDECLAST128,
25274 IX86_BUILTIN_AESIMC128,
25275 IX86_BUILTIN_AESKEYGENASSIST128,
25277 /* PCLMUL instruction */
25278 IX86_BUILTIN_PCLMULQDQ128,
25281 IX86_BUILTIN_ADDPD256,
25282 IX86_BUILTIN_ADDPS256,
25283 IX86_BUILTIN_ADDSUBPD256,
25284 IX86_BUILTIN_ADDSUBPS256,
25285 IX86_BUILTIN_ANDPD256,
25286 IX86_BUILTIN_ANDPS256,
25287 IX86_BUILTIN_ANDNPD256,
25288 IX86_BUILTIN_ANDNPS256,
25289 IX86_BUILTIN_BLENDPD256,
25290 IX86_BUILTIN_BLENDPS256,
25291 IX86_BUILTIN_BLENDVPD256,
25292 IX86_BUILTIN_BLENDVPS256,
25293 IX86_BUILTIN_DIVPD256,
25294 IX86_BUILTIN_DIVPS256,
25295 IX86_BUILTIN_DPPS256,
25296 IX86_BUILTIN_HADDPD256,
25297 IX86_BUILTIN_HADDPS256,
25298 IX86_BUILTIN_HSUBPD256,
25299 IX86_BUILTIN_HSUBPS256,
25300 IX86_BUILTIN_MAXPD256,
25301 IX86_BUILTIN_MAXPS256,
25302 IX86_BUILTIN_MINPD256,
25303 IX86_BUILTIN_MINPS256,
25304 IX86_BUILTIN_MULPD256,
25305 IX86_BUILTIN_MULPS256,
25306 IX86_BUILTIN_ORPD256,
25307 IX86_BUILTIN_ORPS256,
25308 IX86_BUILTIN_SHUFPD256,
25309 IX86_BUILTIN_SHUFPS256,
25310 IX86_BUILTIN_SUBPD256,
25311 IX86_BUILTIN_SUBPS256,
25312 IX86_BUILTIN_XORPD256,
25313 IX86_BUILTIN_XORPS256,
25314 IX86_BUILTIN_CMPSD,
25315 IX86_BUILTIN_CMPSS,
25316 IX86_BUILTIN_CMPPD,
25317 IX86_BUILTIN_CMPPS,
25318 IX86_BUILTIN_CMPPD256,
25319 IX86_BUILTIN_CMPPS256,
25320 IX86_BUILTIN_CVTDQ2PD256,
25321 IX86_BUILTIN_CVTDQ2PS256,
25322 IX86_BUILTIN_CVTPD2PS256,
25323 IX86_BUILTIN_CVTPS2DQ256,
25324 IX86_BUILTIN_CVTPS2PD256,
25325 IX86_BUILTIN_CVTTPD2DQ256,
25326 IX86_BUILTIN_CVTPD2DQ256,
25327 IX86_BUILTIN_CVTTPS2DQ256,
25328 IX86_BUILTIN_EXTRACTF128PD256,
25329 IX86_BUILTIN_EXTRACTF128PS256,
25330 IX86_BUILTIN_EXTRACTF128SI256,
25331 IX86_BUILTIN_VZEROALL,
25332 IX86_BUILTIN_VZEROUPPER,
25333 IX86_BUILTIN_VPERMILVARPD,
25334 IX86_BUILTIN_VPERMILVARPS,
25335 IX86_BUILTIN_VPERMILVARPD256,
25336 IX86_BUILTIN_VPERMILVARPS256,
25337 IX86_BUILTIN_VPERMILPD,
25338 IX86_BUILTIN_VPERMILPS,
25339 IX86_BUILTIN_VPERMILPD256,
25340 IX86_BUILTIN_VPERMILPS256,
25341 IX86_BUILTIN_VPERMIL2PD,
25342 IX86_BUILTIN_VPERMIL2PS,
25343 IX86_BUILTIN_VPERMIL2PD256,
25344 IX86_BUILTIN_VPERMIL2PS256,
25345 IX86_BUILTIN_VPERM2F128PD256,
25346 IX86_BUILTIN_VPERM2F128PS256,
25347 IX86_BUILTIN_VPERM2F128SI256,
25348 IX86_BUILTIN_VBROADCASTSS,
25349 IX86_BUILTIN_VBROADCASTSD256,
25350 IX86_BUILTIN_VBROADCASTSS256,
25351 IX86_BUILTIN_VBROADCASTPD256,
25352 IX86_BUILTIN_VBROADCASTPS256,
25353 IX86_BUILTIN_VINSERTF128PD256,
25354 IX86_BUILTIN_VINSERTF128PS256,
25355 IX86_BUILTIN_VINSERTF128SI256,
25356 IX86_BUILTIN_LOADUPD256,
25357 IX86_BUILTIN_LOADUPS256,
25358 IX86_BUILTIN_STOREUPD256,
25359 IX86_BUILTIN_STOREUPS256,
25360 IX86_BUILTIN_LDDQU256,
25361 IX86_BUILTIN_MOVNTDQ256,
25362 IX86_BUILTIN_MOVNTPD256,
25363 IX86_BUILTIN_MOVNTPS256,
25364 IX86_BUILTIN_LOADDQU256,
25365 IX86_BUILTIN_STOREDQU256,
25366 IX86_BUILTIN_MASKLOADPD,
25367 IX86_BUILTIN_MASKLOADPS,
25368 IX86_BUILTIN_MASKSTOREPD,
25369 IX86_BUILTIN_MASKSTOREPS,
25370 IX86_BUILTIN_MASKLOADPD256,
25371 IX86_BUILTIN_MASKLOADPS256,
25372 IX86_BUILTIN_MASKSTOREPD256,
25373 IX86_BUILTIN_MASKSTOREPS256,
25374 IX86_BUILTIN_MOVSHDUP256,
25375 IX86_BUILTIN_MOVSLDUP256,
25376 IX86_BUILTIN_MOVDDUP256,
25378 IX86_BUILTIN_SQRTPD256,
25379 IX86_BUILTIN_SQRTPS256,
25380 IX86_BUILTIN_SQRTPS_NR256,
25381 IX86_BUILTIN_RSQRTPS256,
25382 IX86_BUILTIN_RSQRTPS_NR256,
25384 IX86_BUILTIN_RCPPS256,
25386 IX86_BUILTIN_ROUNDPD256,
25387 IX86_BUILTIN_ROUNDPS256,
25389 IX86_BUILTIN_FLOORPD256,
25390 IX86_BUILTIN_CEILPD256,
25391 IX86_BUILTIN_TRUNCPD256,
25392 IX86_BUILTIN_RINTPD256,
25393 IX86_BUILTIN_ROUNDPD_AZ256,
25395 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
25396 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
25397 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
25399 IX86_BUILTIN_FLOORPS256,
25400 IX86_BUILTIN_CEILPS256,
25401 IX86_BUILTIN_TRUNCPS256,
25402 IX86_BUILTIN_RINTPS256,
25403 IX86_BUILTIN_ROUNDPS_AZ256,
25405 IX86_BUILTIN_FLOORPS_SFIX256,
25406 IX86_BUILTIN_CEILPS_SFIX256,
25407 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
25409 IX86_BUILTIN_UNPCKHPD256,
25410 IX86_BUILTIN_UNPCKLPD256,
25411 IX86_BUILTIN_UNPCKHPS256,
25412 IX86_BUILTIN_UNPCKLPS256,
25414 IX86_BUILTIN_SI256_SI,
25415 IX86_BUILTIN_PS256_PS,
25416 IX86_BUILTIN_PD256_PD,
25417 IX86_BUILTIN_SI_SI256,
25418 IX86_BUILTIN_PS_PS256,
25419 IX86_BUILTIN_PD_PD256,
25421 IX86_BUILTIN_VTESTZPD,
25422 IX86_BUILTIN_VTESTCPD,
25423 IX86_BUILTIN_VTESTNZCPD,
25424 IX86_BUILTIN_VTESTZPS,
25425 IX86_BUILTIN_VTESTCPS,
25426 IX86_BUILTIN_VTESTNZCPS,
25427 IX86_BUILTIN_VTESTZPD256,
25428 IX86_BUILTIN_VTESTCPD256,
25429 IX86_BUILTIN_VTESTNZCPD256,
25430 IX86_BUILTIN_VTESTZPS256,
25431 IX86_BUILTIN_VTESTCPS256,
25432 IX86_BUILTIN_VTESTNZCPS256,
25433 IX86_BUILTIN_PTESTZ256,
25434 IX86_BUILTIN_PTESTC256,
25435 IX86_BUILTIN_PTESTNZC256,
25437 IX86_BUILTIN_MOVMSKPD256,
25438 IX86_BUILTIN_MOVMSKPS256,
25441 IX86_BUILTIN_MPSADBW256,
25442 IX86_BUILTIN_PABSB256,
25443 IX86_BUILTIN_PABSW256,
25444 IX86_BUILTIN_PABSD256,
25445 IX86_BUILTIN_PACKSSDW256,
25446 IX86_BUILTIN_PACKSSWB256,
25447 IX86_BUILTIN_PACKUSDW256,
25448 IX86_BUILTIN_PACKUSWB256,
25449 IX86_BUILTIN_PADDB256,
25450 IX86_BUILTIN_PADDW256,
25451 IX86_BUILTIN_PADDD256,
25452 IX86_BUILTIN_PADDQ256,
25453 IX86_BUILTIN_PADDSB256,
25454 IX86_BUILTIN_PADDSW256,
25455 IX86_BUILTIN_PADDUSB256,
25456 IX86_BUILTIN_PADDUSW256,
25457 IX86_BUILTIN_PALIGNR256,
25458 IX86_BUILTIN_AND256I,
25459 IX86_BUILTIN_ANDNOT256I,
25460 IX86_BUILTIN_PAVGB256,
25461 IX86_BUILTIN_PAVGW256,
25462 IX86_BUILTIN_PBLENDVB256,
25463 IX86_BUILTIN_PBLENDVW256,
25464 IX86_BUILTIN_PCMPEQB256,
25465 IX86_BUILTIN_PCMPEQW256,
25466 IX86_BUILTIN_PCMPEQD256,
25467 IX86_BUILTIN_PCMPEQQ256,
25468 IX86_BUILTIN_PCMPGTB256,
25469 IX86_BUILTIN_PCMPGTW256,
25470 IX86_BUILTIN_PCMPGTD256,
25471 IX86_BUILTIN_PCMPGTQ256,
25472 IX86_BUILTIN_PHADDW256,
25473 IX86_BUILTIN_PHADDD256,
25474 IX86_BUILTIN_PHADDSW256,
25475 IX86_BUILTIN_PHSUBW256,
25476 IX86_BUILTIN_PHSUBD256,
25477 IX86_BUILTIN_PHSUBSW256,
25478 IX86_BUILTIN_PMADDUBSW256,
25479 IX86_BUILTIN_PMADDWD256,
25480 IX86_BUILTIN_PMAXSB256,
25481 IX86_BUILTIN_PMAXSW256,
25482 IX86_BUILTIN_PMAXSD256,
25483 IX86_BUILTIN_PMAXUB256,
25484 IX86_BUILTIN_PMAXUW256,
25485 IX86_BUILTIN_PMAXUD256,
25486 IX86_BUILTIN_PMINSB256,
25487 IX86_BUILTIN_PMINSW256,
25488 IX86_BUILTIN_PMINSD256,
25489 IX86_BUILTIN_PMINUB256,
25490 IX86_BUILTIN_PMINUW256,
25491 IX86_BUILTIN_PMINUD256,
25492 IX86_BUILTIN_PMOVMSKB256,
25493 IX86_BUILTIN_PMOVSXBW256,
25494 IX86_BUILTIN_PMOVSXBD256,
25495 IX86_BUILTIN_PMOVSXBQ256,
25496 IX86_BUILTIN_PMOVSXWD256,
25497 IX86_BUILTIN_PMOVSXWQ256,
25498 IX86_BUILTIN_PMOVSXDQ256,
25499 IX86_BUILTIN_PMOVZXBW256,
25500 IX86_BUILTIN_PMOVZXBD256,
25501 IX86_BUILTIN_PMOVZXBQ256,
25502 IX86_BUILTIN_PMOVZXWD256,
25503 IX86_BUILTIN_PMOVZXWQ256,
25504 IX86_BUILTIN_PMOVZXDQ256,
25505 IX86_BUILTIN_PMULDQ256,
25506 IX86_BUILTIN_PMULHRSW256,
25507 IX86_BUILTIN_PMULHUW256,
25508 IX86_BUILTIN_PMULHW256,
25509 IX86_BUILTIN_PMULLW256,
25510 IX86_BUILTIN_PMULLD256,
25511 IX86_BUILTIN_PMULUDQ256,
25512 IX86_BUILTIN_POR256,
25513 IX86_BUILTIN_PSADBW256,
25514 IX86_BUILTIN_PSHUFB256,
25515 IX86_BUILTIN_PSHUFD256,
25516 IX86_BUILTIN_PSHUFHW256,
25517 IX86_BUILTIN_PSHUFLW256,
25518 IX86_BUILTIN_PSIGNB256,
25519 IX86_BUILTIN_PSIGNW256,
25520 IX86_BUILTIN_PSIGND256,
25521 IX86_BUILTIN_PSLLDQI256,
25522 IX86_BUILTIN_PSLLWI256,
25523 IX86_BUILTIN_PSLLW256,
25524 IX86_BUILTIN_PSLLDI256,
25525 IX86_BUILTIN_PSLLD256,
25526 IX86_BUILTIN_PSLLQI256,
25527 IX86_BUILTIN_PSLLQ256,
25528 IX86_BUILTIN_PSRAWI256,
25529 IX86_BUILTIN_PSRAW256,
25530 IX86_BUILTIN_PSRADI256,
25531 IX86_BUILTIN_PSRAD256,
25532 IX86_BUILTIN_PSRLDQI256,
25533 IX86_BUILTIN_PSRLWI256,
25534 IX86_BUILTIN_PSRLW256,
25535 IX86_BUILTIN_PSRLDI256,
25536 IX86_BUILTIN_PSRLD256,
25537 IX86_BUILTIN_PSRLQI256,
25538 IX86_BUILTIN_PSRLQ256,
25539 IX86_BUILTIN_PSUBB256,
25540 IX86_BUILTIN_PSUBW256,
25541 IX86_BUILTIN_PSUBD256,
25542 IX86_BUILTIN_PSUBQ256,
25543 IX86_BUILTIN_PSUBSB256,
25544 IX86_BUILTIN_PSUBSW256,
25545 IX86_BUILTIN_PSUBUSB256,
25546 IX86_BUILTIN_PSUBUSW256,
25547 IX86_BUILTIN_PUNPCKHBW256,
25548 IX86_BUILTIN_PUNPCKHWD256,
25549 IX86_BUILTIN_PUNPCKHDQ256,
25550 IX86_BUILTIN_PUNPCKHQDQ256,
25551 IX86_BUILTIN_PUNPCKLBW256,
25552 IX86_BUILTIN_PUNPCKLWD256,
25553 IX86_BUILTIN_PUNPCKLDQ256,
25554 IX86_BUILTIN_PUNPCKLQDQ256,
25555 IX86_BUILTIN_PXOR256,
25556 IX86_BUILTIN_MOVNTDQA256,
25557 IX86_BUILTIN_VBROADCASTSS_PS,
25558 IX86_BUILTIN_VBROADCASTSS_PS256,
25559 IX86_BUILTIN_VBROADCASTSD_PD256,
25560 IX86_BUILTIN_VBROADCASTSI256,
25561 IX86_BUILTIN_PBLENDD256,
25562 IX86_BUILTIN_PBLENDD128,
25563 IX86_BUILTIN_PBROADCASTB256,
25564 IX86_BUILTIN_PBROADCASTW256,
25565 IX86_BUILTIN_PBROADCASTD256,
25566 IX86_BUILTIN_PBROADCASTQ256,
25567 IX86_BUILTIN_PBROADCASTB128,
25568 IX86_BUILTIN_PBROADCASTW128,
25569 IX86_BUILTIN_PBROADCASTD128,
25570 IX86_BUILTIN_PBROADCASTQ128,
25571 IX86_BUILTIN_VPERMVARSI256,
25572 IX86_BUILTIN_VPERMDF256,
25573 IX86_BUILTIN_VPERMVARSF256,
25574 IX86_BUILTIN_VPERMDI256,
25575 IX86_BUILTIN_VPERMTI256,
25576 IX86_BUILTIN_VEXTRACT128I256,
25577 IX86_BUILTIN_VINSERT128I256,
25578 IX86_BUILTIN_MASKLOADD,
25579 IX86_BUILTIN_MASKLOADQ,
25580 IX86_BUILTIN_MASKLOADD256,
25581 IX86_BUILTIN_MASKLOADQ256,
25582 IX86_BUILTIN_MASKSTORED,
25583 IX86_BUILTIN_MASKSTOREQ,
25584 IX86_BUILTIN_MASKSTORED256,
25585 IX86_BUILTIN_MASKSTOREQ256,
25586 IX86_BUILTIN_PSLLVV4DI,
25587 IX86_BUILTIN_PSLLVV2DI,
25588 IX86_BUILTIN_PSLLVV8SI,
25589 IX86_BUILTIN_PSLLVV4SI,
25590 IX86_BUILTIN_PSRAVV8SI,
25591 IX86_BUILTIN_PSRAVV4SI,
25592 IX86_BUILTIN_PSRLVV4DI,
25593 IX86_BUILTIN_PSRLVV2DI,
25594 IX86_BUILTIN_PSRLVV8SI,
25595 IX86_BUILTIN_PSRLVV4SI,
25597 IX86_BUILTIN_GATHERSIV2DF,
25598 IX86_BUILTIN_GATHERSIV4DF,
25599 IX86_BUILTIN_GATHERDIV2DF,
25600 IX86_BUILTIN_GATHERDIV4DF,
25601 IX86_BUILTIN_GATHERSIV4SF,
25602 IX86_BUILTIN_GATHERSIV8SF,
25603 IX86_BUILTIN_GATHERDIV4SF,
25604 IX86_BUILTIN_GATHERDIV8SF,
25605 IX86_BUILTIN_GATHERSIV2DI,
25606 IX86_BUILTIN_GATHERSIV4DI,
25607 IX86_BUILTIN_GATHERDIV2DI,
25608 IX86_BUILTIN_GATHERDIV4DI,
25609 IX86_BUILTIN_GATHERSIV4SI,
25610 IX86_BUILTIN_GATHERSIV8SI,
25611 IX86_BUILTIN_GATHERDIV4SI,
25612 IX86_BUILTIN_GATHERDIV8SI,
25614 /* Alternate 4 element gather for the vectorizer where
25615 all operands are 32-byte wide. */
25616 IX86_BUILTIN_GATHERALTSIV4DF,
25617 IX86_BUILTIN_GATHERALTDIV8SF,
25618 IX86_BUILTIN_GATHERALTSIV4DI,
25619 IX86_BUILTIN_GATHERALTDIV8SI,
25621 /* TFmode support builtins. */
25623 IX86_BUILTIN_HUGE_VALQ,
25624 IX86_BUILTIN_FABSQ,
25625 IX86_BUILTIN_COPYSIGNQ,
25627 /* Vectorizer support builtins. */
25628 IX86_BUILTIN_CPYSGNPS,
25629 IX86_BUILTIN_CPYSGNPD,
25630 IX86_BUILTIN_CPYSGNPS256,
25631 IX86_BUILTIN_CPYSGNPD256,
25633 /* FMA4 instructions. */
25634 IX86_BUILTIN_VFMADDSS,
25635 IX86_BUILTIN_VFMADDSD,
25636 IX86_BUILTIN_VFMADDPS,
25637 IX86_BUILTIN_VFMADDPD,
25638 IX86_BUILTIN_VFMADDPS256,
25639 IX86_BUILTIN_VFMADDPD256,
25640 IX86_BUILTIN_VFMADDSUBPS,
25641 IX86_BUILTIN_VFMADDSUBPD,
25642 IX86_BUILTIN_VFMADDSUBPS256,
25643 IX86_BUILTIN_VFMADDSUBPD256,
25645 /* FMA3 instructions. */
25646 IX86_BUILTIN_VFMADDSS3,
25647 IX86_BUILTIN_VFMADDSD3,
25649 /* XOP instructions. */
25650 IX86_BUILTIN_VPCMOV,
25651 IX86_BUILTIN_VPCMOV_V2DI,
25652 IX86_BUILTIN_VPCMOV_V4SI,
25653 IX86_BUILTIN_VPCMOV_V8HI,
25654 IX86_BUILTIN_VPCMOV_V16QI,
25655 IX86_BUILTIN_VPCMOV_V4SF,
25656 IX86_BUILTIN_VPCMOV_V2DF,
25657 IX86_BUILTIN_VPCMOV256,
25658 IX86_BUILTIN_VPCMOV_V4DI256,
25659 IX86_BUILTIN_VPCMOV_V8SI256,
25660 IX86_BUILTIN_VPCMOV_V16HI256,
25661 IX86_BUILTIN_VPCMOV_V32QI256,
25662 IX86_BUILTIN_VPCMOV_V8SF256,
25663 IX86_BUILTIN_VPCMOV_V4DF256,
25665 IX86_BUILTIN_VPPERM,
25667 IX86_BUILTIN_VPMACSSWW,
25668 IX86_BUILTIN_VPMACSWW,
25669 IX86_BUILTIN_VPMACSSWD,
25670 IX86_BUILTIN_VPMACSWD,
25671 IX86_BUILTIN_VPMACSSDD,
25672 IX86_BUILTIN_VPMACSDD,
25673 IX86_BUILTIN_VPMACSSDQL,
25674 IX86_BUILTIN_VPMACSSDQH,
25675 IX86_BUILTIN_VPMACSDQL,
25676 IX86_BUILTIN_VPMACSDQH,
25677 IX86_BUILTIN_VPMADCSSWD,
25678 IX86_BUILTIN_VPMADCSWD,
25680 IX86_BUILTIN_VPHADDBW,
25681 IX86_BUILTIN_VPHADDBD,
25682 IX86_BUILTIN_VPHADDBQ,
25683 IX86_BUILTIN_VPHADDWD,
25684 IX86_BUILTIN_VPHADDWQ,
25685 IX86_BUILTIN_VPHADDDQ,
25686 IX86_BUILTIN_VPHADDUBW,
25687 IX86_BUILTIN_VPHADDUBD,
25688 IX86_BUILTIN_VPHADDUBQ,
25689 IX86_BUILTIN_VPHADDUWD,
25690 IX86_BUILTIN_VPHADDUWQ,
25691 IX86_BUILTIN_VPHADDUDQ,
25692 IX86_BUILTIN_VPHSUBBW,
25693 IX86_BUILTIN_VPHSUBWD,
25694 IX86_BUILTIN_VPHSUBDQ,
25696 IX86_BUILTIN_VPROTB,
25697 IX86_BUILTIN_VPROTW,
25698 IX86_BUILTIN_VPROTD,
25699 IX86_BUILTIN_VPROTQ,
25700 IX86_BUILTIN_VPROTB_IMM,
25701 IX86_BUILTIN_VPROTW_IMM,
25702 IX86_BUILTIN_VPROTD_IMM,
25703 IX86_BUILTIN_VPROTQ_IMM,
25705 IX86_BUILTIN_VPSHLB,
25706 IX86_BUILTIN_VPSHLW,
25707 IX86_BUILTIN_VPSHLD,
25708 IX86_BUILTIN_VPSHLQ,
25709 IX86_BUILTIN_VPSHAB,
25710 IX86_BUILTIN_VPSHAW,
25711 IX86_BUILTIN_VPSHAD,
25712 IX86_BUILTIN_VPSHAQ,
25714 IX86_BUILTIN_VFRCZSS,
25715 IX86_BUILTIN_VFRCZSD,
25716 IX86_BUILTIN_VFRCZPS,
25717 IX86_BUILTIN_VFRCZPD,
25718 IX86_BUILTIN_VFRCZPS256,
25719 IX86_BUILTIN_VFRCZPD256,
25721 IX86_BUILTIN_VPCOMEQUB,
25722 IX86_BUILTIN_VPCOMNEUB,
25723 IX86_BUILTIN_VPCOMLTUB,
25724 IX86_BUILTIN_VPCOMLEUB,
25725 IX86_BUILTIN_VPCOMGTUB,
25726 IX86_BUILTIN_VPCOMGEUB,
25727 IX86_BUILTIN_VPCOMFALSEUB,
25728 IX86_BUILTIN_VPCOMTRUEUB,
25730 IX86_BUILTIN_VPCOMEQUW,
25731 IX86_BUILTIN_VPCOMNEUW,
25732 IX86_BUILTIN_VPCOMLTUW,
25733 IX86_BUILTIN_VPCOMLEUW,
25734 IX86_BUILTIN_VPCOMGTUW,
25735 IX86_BUILTIN_VPCOMGEUW,
25736 IX86_BUILTIN_VPCOMFALSEUW,
25737 IX86_BUILTIN_VPCOMTRUEUW,
25739 IX86_BUILTIN_VPCOMEQUD,
25740 IX86_BUILTIN_VPCOMNEUD,
25741 IX86_BUILTIN_VPCOMLTUD,
25742 IX86_BUILTIN_VPCOMLEUD,
25743 IX86_BUILTIN_VPCOMGTUD,
25744 IX86_BUILTIN_VPCOMGEUD,
25745 IX86_BUILTIN_VPCOMFALSEUD,
25746 IX86_BUILTIN_VPCOMTRUEUD,
25748 IX86_BUILTIN_VPCOMEQUQ,
25749 IX86_BUILTIN_VPCOMNEUQ,
25750 IX86_BUILTIN_VPCOMLTUQ,
25751 IX86_BUILTIN_VPCOMLEUQ,
25752 IX86_BUILTIN_VPCOMGTUQ,
25753 IX86_BUILTIN_VPCOMGEUQ,
25754 IX86_BUILTIN_VPCOMFALSEUQ,
25755 IX86_BUILTIN_VPCOMTRUEUQ,
25757 IX86_BUILTIN_VPCOMEQB,
25758 IX86_BUILTIN_VPCOMNEB,
25759 IX86_BUILTIN_VPCOMLTB,
25760 IX86_BUILTIN_VPCOMLEB,
25761 IX86_BUILTIN_VPCOMGTB,
25762 IX86_BUILTIN_VPCOMGEB,
25763 IX86_BUILTIN_VPCOMFALSEB,
25764 IX86_BUILTIN_VPCOMTRUEB,
25766 IX86_BUILTIN_VPCOMEQW,
25767 IX86_BUILTIN_VPCOMNEW,
25768 IX86_BUILTIN_VPCOMLTW,
25769 IX86_BUILTIN_VPCOMLEW,
25770 IX86_BUILTIN_VPCOMGTW,
25771 IX86_BUILTIN_VPCOMGEW,
25772 IX86_BUILTIN_VPCOMFALSEW,
25773 IX86_BUILTIN_VPCOMTRUEW,
25775 IX86_BUILTIN_VPCOMEQD,
25776 IX86_BUILTIN_VPCOMNED,
25777 IX86_BUILTIN_VPCOMLTD,
25778 IX86_BUILTIN_VPCOMLED,
25779 IX86_BUILTIN_VPCOMGTD,
25780 IX86_BUILTIN_VPCOMGED,
25781 IX86_BUILTIN_VPCOMFALSED,
25782 IX86_BUILTIN_VPCOMTRUED,
25784 IX86_BUILTIN_VPCOMEQQ,
25785 IX86_BUILTIN_VPCOMNEQ,
25786 IX86_BUILTIN_VPCOMLTQ,
25787 IX86_BUILTIN_VPCOMLEQ,
25788 IX86_BUILTIN_VPCOMGTQ,
25789 IX86_BUILTIN_VPCOMGEQ,
25790 IX86_BUILTIN_VPCOMFALSEQ,
25791 IX86_BUILTIN_VPCOMTRUEQ,
25793 /* LWP instructions. */
25794 IX86_BUILTIN_LLWPCB,
25795 IX86_BUILTIN_SLWPCB,
25796 IX86_BUILTIN_LWPVAL32,
25797 IX86_BUILTIN_LWPVAL64,
25798 IX86_BUILTIN_LWPINS32,
25799 IX86_BUILTIN_LWPINS64,
25803 /* BMI instructions. */
25804 IX86_BUILTIN_BEXTR32,
25805 IX86_BUILTIN_BEXTR64,
25808 /* TBM instructions. */
25809 IX86_BUILTIN_BEXTRI32,
25810 IX86_BUILTIN_BEXTRI64,
25812 /* BMI2 instructions. */
25813 IX86_BUILTIN_BZHI32,
25814 IX86_BUILTIN_BZHI64,
25815 IX86_BUILTIN_PDEP32,
25816 IX86_BUILTIN_PDEP64,
25817 IX86_BUILTIN_PEXT32,
25818 IX86_BUILTIN_PEXT64,
25820 /* FSGSBASE instructions. */
25821 IX86_BUILTIN_RDFSBASE32,
25822 IX86_BUILTIN_RDFSBASE64,
25823 IX86_BUILTIN_RDGSBASE32,
25824 IX86_BUILTIN_RDGSBASE64,
25825 IX86_BUILTIN_WRFSBASE32,
25826 IX86_BUILTIN_WRFSBASE64,
25827 IX86_BUILTIN_WRGSBASE32,
25828 IX86_BUILTIN_WRGSBASE64,
25830 /* RDRND instructions. */
25831 IX86_BUILTIN_RDRAND16_STEP,
25832 IX86_BUILTIN_RDRAND32_STEP,
25833 IX86_BUILTIN_RDRAND64_STEP,
25835 /* F16C instructions. */
25836 IX86_BUILTIN_CVTPH2PS,
25837 IX86_BUILTIN_CVTPH2PS256,
25838 IX86_BUILTIN_CVTPS2PH,
25839 IX86_BUILTIN_CVTPS2PH256,
25841 /* CFString built-in for darwin */
25842 IX86_BUILTIN_CFSTRING,
25847 /* Table for the ix86 builtin decls. */
25848 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
25850 /* Table of all of the builtin functions that are possible with different ISA's
25851 but are waiting to be built until a function is declared to use that
25853 struct builtin_isa {
25854 const char *name; /* function name */
25855 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
25856 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
25857 bool const_p; /* true if the declaration is constant */
25858 bool set_and_not_built_p;
25861 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
25864 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25865 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25866 function decl in the ix86_builtins array. Returns the function decl or
25867 NULL_TREE, if the builtin was not added.
25869 If the front end has a special hook for builtin functions, delay adding
25870 builtin functions that aren't in the current ISA until the ISA is changed
25871 with function specific optimization. Doing so, can save about 300K for the
25872 default compiler. When the builtin is expanded, check at that time whether
25875 If the front end doesn't have a special hook, record all builtins, even if
25876 it isn't an instruction set in the current ISA in case the user uses
25877 function specific options for a different ISA, so that we don't get scope
25878 errors if a builtin is added in the middle of a function scope. */
25881 def_builtin (HOST_WIDE_INT mask, const char *name,
25882 enum ix86_builtin_func_type tcode,
25883 enum ix86_builtins code)
25885 tree decl = NULL_TREE;
25887 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
25889 ix86_builtins_isa[(int) code].isa = mask;
25891 mask &= ~OPTION_MASK_ISA_64BIT;
25893 || (mask & ix86_isa_flags) != 0
25894 || (lang_hooks.builtin_function
25895 == lang_hooks.builtin_function_ext_scope))
25898 tree type = ix86_get_builtin_func_type (tcode);
25899 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
25901 ix86_builtins[(int) code] = decl;
25902 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
25906 ix86_builtins[(int) code] = NULL_TREE;
25907 ix86_builtins_isa[(int) code].tcode = tcode;
25908 ix86_builtins_isa[(int) code].name = name;
25909 ix86_builtins_isa[(int) code].const_p = false;
25910 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
25917 /* Like def_builtin, but also marks the function decl "const". */
25920 def_builtin_const (HOST_WIDE_INT mask, const char *name,
25921 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
25923 tree decl = def_builtin (mask, name, tcode, code);
25925 TREE_READONLY (decl) = 1;
25927 ix86_builtins_isa[(int) code].const_p = true;
25932 /* Add any new builtin functions for a given ISA that may not have been
25933 declared. This saves a bit of space compared to adding all of the
25934 declarations to the tree, even if we didn't use them. */
25937 ix86_add_new_builtins (HOST_WIDE_INT isa)
25941 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
25943 if ((ix86_builtins_isa[i].isa & isa) != 0
25944 && ix86_builtins_isa[i].set_and_not_built_p)
25948 /* Don't define the builtin again. */
25949 ix86_builtins_isa[i].set_and_not_built_p = false;
25951 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
25952 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
25953 type, i, BUILT_IN_MD, NULL,
25956 ix86_builtins[i] = decl;
25957 if (ix86_builtins_isa[i].const_p)
25958 TREE_READONLY (decl) = 1;
25963 /* Bits for builtin_description.flag. */
25965 /* Set when we don't support the comparison natively, and should
25966 swap_comparison in order to support it. */
25967 #define BUILTIN_DESC_SWAP_OPERANDS 1
25969 struct builtin_description
25971 const HOST_WIDE_INT mask;
25972 const enum insn_code icode;
25973 const char *const name;
25974 const enum ix86_builtins code;
25975 const enum rtx_code comparison;
25979 static const struct builtin_description bdesc_comi[] =
25981 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
25982 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
25983 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
25984 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
25985 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
25986 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
25987 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
25988 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
25989 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
25990 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
25991 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
25992 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
25993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
25994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
25995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
25996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
25997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
25998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
25999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
26000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
26001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
26002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
26003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
26004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
26007 static const struct builtin_description bdesc_pcmpestr[] =
26010 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
26011 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
26012 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
26013 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
26014 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
26015 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
26016 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
26019 static const struct builtin_description bdesc_pcmpistr[] =
26022 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
26023 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
26024 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
26025 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
26026 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
26027 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
26028 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
26031 /* Special builtins with variable number of arguments. */
26032 static const struct builtin_description bdesc_special_args[] =
26034 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
26035 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
26036 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
26039 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
26042 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
26045 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26046 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26047 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
26049 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
26050 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
26051 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
26052 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
26054 /* SSE or 3DNow!A */
26055 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26056 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
26059 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26062 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
26063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
26065 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
26066 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
26067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
26068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
26070 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
26071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
26074 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
26077 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
26080 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26081 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
26085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
26087 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
26088 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
26089 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
26090 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
26091 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
26093 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
26094 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
26095 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
26096 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
26097 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
26098 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
26099 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
26101 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
26102 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
26103 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
26105 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
26106 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
26107 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
26108 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
26109 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
26110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
26111 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
26112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
26115 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
26116 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
26117 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
26118 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
26119 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
26120 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
26121 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
26122 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
26123 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
26125 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
26126 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
26127 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
26128 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
26129 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
26130 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
26133 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
26134 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
26135 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
26136 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
26137 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
26138 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
26139 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
26140 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
26143 /* Builtins with variable number of arguments. */
26144 static const struct builtin_description bdesc_args[] =
26146 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
26147 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
26148 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
26149 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
26150 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
26151 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
26152 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
26155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26176 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26177 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26179 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26182 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26183 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
26191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
26193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
26194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
26195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
26197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
26199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
26202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
26206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
26209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
26213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26219 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
26220 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
26221 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26222 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26224 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26225 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26226 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26227 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26228 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26229 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26230 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26231 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26232 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26233 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26234 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26235 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26236 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26237 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26238 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26241 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
26242 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
26243 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
26244 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26245 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26246 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
26250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26251 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26252 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26253 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
26256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
26257 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
26258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
26259 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
26260 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
26262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26264 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26265 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26266 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26268 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26269 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26270 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26271 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
26274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
26275 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
26276 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
26280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
26281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
26282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
26284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
26286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
26287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
26288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
26290 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
26291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
26292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26294 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26296 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26297 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26301 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26303 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26304 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26306 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26311 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26312 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
26315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
26316 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
26318 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
26320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26324 /* SSE MMX or 3Dnow!A */
26325 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26326 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26327 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26329 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26330 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26331 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26332 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26334 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
26335 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
26337 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
26340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
26343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
26344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
26345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
26346 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
26348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
26349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
26350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
26351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
26352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
26354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
26356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
26357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
26358 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
26359 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
26361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
26363 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26365 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26366 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26367 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26368 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26372 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
26375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
26376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
26377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
26379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
26381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
26382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
26383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
26387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
26388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
26389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
26391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
26392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
26393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26396 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26400 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26402 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26403 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26405 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26408 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26409 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26411 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
26413 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26414 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26415 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26416 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26417 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26418 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26419 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26420 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26431 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26432 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
26434 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26436 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26437 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26449 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26450 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26451 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26454 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26455 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26456 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26457 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26458 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26459 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26460 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26461 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
26464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
26465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
26467 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
26470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
26471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
26473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
26475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
26476 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
26477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
26478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
26480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
26481 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26482 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26483 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
26484 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26485 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26486 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
26488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
26489 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26490 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26491 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
26492 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26493 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26494 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
26496 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26497 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26498 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26499 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
26502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
26503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
26505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
26507 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
26508 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
26510 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26513 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
26514 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
26517 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
26518 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26520 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26521 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26522 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26523 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26524 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26525 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26528 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
26529 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
26530 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26531 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
26532 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
26533 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
26535 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26536 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26537 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26538 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26539 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26540 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26541 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26542 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26543 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26544 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26545 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26546 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26547 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
26548 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
26549 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26550 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26551 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26552 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26553 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26554 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26555 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26556 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26557 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26558 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26561 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
26562 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
26565 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26566 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26567 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
26568 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
26569 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26570 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26571 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26572 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
26573 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
26574 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
26576 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
26577 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
26578 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
26579 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
26580 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
26581 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
26582 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
26583 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
26584 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
26585 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
26586 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
26587 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
26588 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26590 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
26591 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26592 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26593 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26594 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26595 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26596 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26597 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26598 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26599 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26600 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
26601 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26604 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26605 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26606 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26607 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26609 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
26610 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
26611 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
26612 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
26614 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
26615 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
26617 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
26618 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
26620 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
26621 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
26622 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
26623 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
26625 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
26626 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
26628 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26629 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26631 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26632 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26633 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26636 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26637 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
26638 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
26639 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26640 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26643 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
26644 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
26645 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
26646 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26649 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
26650 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26652 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26653 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26654 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26655 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26658 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
26661 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26662 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26665 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26666 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26669 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26675 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26676 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26677 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26678 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26679 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26680 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26681 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26682 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26683 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26684 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26685 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26686 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
26689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
26690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
26691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
26693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
26696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
26697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
26707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
26708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
26709 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
26710 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
26711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
26712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
26714 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26716 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
26725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
26726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
26728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26732 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26734 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26736 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
26744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
26745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
26746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
26748 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26749 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
26751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
26752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
26754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
26755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
26756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
26757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
26759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
26760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
26762 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26763 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
26771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
26772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
26773 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
26774 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
26775 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
26777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
26794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
26796 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26797 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26799 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
26802 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
26803 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
26804 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
26805 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
26806 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
26807 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
26808 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
26809 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
26810 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26811 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26812 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26813 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
26819 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26820 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
26824 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
26825 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26826 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26827 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26828 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26829 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26830 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26831 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26832 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26833 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26834 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
26841 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26842 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26843 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26844 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26845 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26846 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26847 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26848 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26849 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26850 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26851 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26852 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
26854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26868 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26869 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26870 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26871 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26873 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
26877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
26883 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26884 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26885 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26886 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26887 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26888 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26889 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26890 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26891 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26892 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
26894 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26895 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26896 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26897 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26898 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26899 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26900 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26901 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26902 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26903 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26911 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26912 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26913 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26914 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26915 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26916 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26917 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26918 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
26919 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
26920 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
26921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
26922 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26923 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
26924 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
26925 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
26926 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
26927 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
26928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26929 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
26930 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26931 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26932 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26933 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
26934 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
26935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
26936 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
26937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
26938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26949 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
26952 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26953 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26954 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
26957 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26958 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26961 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
26962 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
26963 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
26964 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
26967 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26968 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26969 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26970 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26971 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26972 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26975 /* FMA4 and XOP. */
26976 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
26977 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
26978 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
26979 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
26980 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
26981 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
26982 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
26983 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
26984 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
26985 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
26986 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
26987 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
26988 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
26989 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
26990 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
26991 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
26992 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
26993 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
26994 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
26995 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
26996 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
26997 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
26998 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
26999 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27000 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27001 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27002 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27003 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27004 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27005 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27006 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27007 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27008 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27009 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27010 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27011 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27012 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27013 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27014 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27015 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27016 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27017 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27018 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27019 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27020 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27021 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27022 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27023 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27024 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27025 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27026 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27027 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27029 static const struct builtin_description bdesc_multi_arg[] =
27031 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
27032 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
27033 UNKNOWN, (int)MULTI_ARG_3_SF },
27034 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
27035 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
27036 UNKNOWN, (int)MULTI_ARG_3_DF },
27038 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
27039 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
27040 UNKNOWN, (int)MULTI_ARG_3_SF },
27041 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
27042 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
27043 UNKNOWN, (int)MULTI_ARG_3_DF },
27045 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
27046 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
27047 UNKNOWN, (int)MULTI_ARG_3_SF },
27048 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
27049 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
27050 UNKNOWN, (int)MULTI_ARG_3_DF },
27051 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
27052 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
27053 UNKNOWN, (int)MULTI_ARG_3_SF2 },
27054 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
27055 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
27056 UNKNOWN, (int)MULTI_ARG_3_DF2 },
27058 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
27059 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
27060 UNKNOWN, (int)MULTI_ARG_3_SF },
27061 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
27062 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
27063 UNKNOWN, (int)MULTI_ARG_3_DF },
27064 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
27065 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
27066 UNKNOWN, (int)MULTI_ARG_3_SF2 },
27067 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
27068 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
27069 UNKNOWN, (int)MULTI_ARG_3_DF2 },
27071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
27072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
27073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
27074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
27075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
27076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
27077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
27079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
27080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
27081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
27082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
27083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
27084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
27085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
27087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
27089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
27090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
27091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
27094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
27095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
27104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
27105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
27106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
27107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
27108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
27109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
27110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
27112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
27113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
27114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
27116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
27117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
27119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
27120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
27121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
27122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
27123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
27124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
27126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
27128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
27129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
27131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
27134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
27135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
27137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
27143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
27144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
27145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
27146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
27147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
27148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
27150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
27151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
27152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
27153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
27154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
27155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
27156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
27158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
27159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
27160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
27161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
27162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
27163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
27164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
27166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
27167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
27168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
27169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
27170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
27171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
27172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
27174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
27175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
27176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
27177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
27178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
27179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
27180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
27182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
27183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
27184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
27185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
27186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
27187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
27188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
27190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
27191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
27192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
27193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
27194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
27195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
27196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
27198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
27199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
27200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
27201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
27202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
27203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
27204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
27206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
27207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
27208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
27209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
27210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
27211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
27212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
27213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
27215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
27216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
27217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
27218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
27219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
27220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
27221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
27222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
27224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
27225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
27226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
27227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
27231 /* TM vector builtins. */
27233 /* Reuse the existing x86-specific `struct builtin_description' cause
27234 we're lazy. Add casts to make them fit. */
27235 static const struct builtin_description bdesc_tm[] =
27237 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27238 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27239 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27240 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27241 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27242 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27243 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27245 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27246 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27247 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27248 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27249 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27250 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27251 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27253 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27254 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27255 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27256 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27257 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27258 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27259 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27261 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
27262 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
27263 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
27266 /* TM callbacks. */
27268 /* Return the builtin decl needed to load a vector of TYPE. */
27271 ix86_builtin_tm_load (tree type)
27273 if (TREE_CODE (type) == VECTOR_TYPE)
27275 switch (tree_low_cst (TYPE_SIZE (type), 1))
27278 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
27280 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
27282 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
27288 /* Return the builtin decl needed to store a vector of TYPE. */
27291 ix86_builtin_tm_store (tree type)
27293 if (TREE_CODE (type) == VECTOR_TYPE)
27295 switch (tree_low_cst (TYPE_SIZE (type), 1))
27298 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
27300 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
27302 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
27308 /* Initialize the transactional memory vector load/store builtins. */
27311 ix86_init_tm_builtins (void)
27313 enum ix86_builtin_func_type ftype;
27314 const struct builtin_description *d;
27317 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
27318 tree attrs_log, attrs_type_log;
27323 /* If there are no builtins defined, we must be compiling in a
27324 language without trans-mem support. */
27325 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
27328 /* Use whatever attributes a normal TM load has. */
27329 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
27330 attrs_load = DECL_ATTRIBUTES (decl);
27331 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27332 /* Use whatever attributes a normal TM store has. */
27333 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
27334 attrs_store = DECL_ATTRIBUTES (decl);
27335 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27336 /* Use whatever attributes a normal TM log has. */
27337 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
27338 attrs_log = DECL_ATTRIBUTES (decl);
27339 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27341 for (i = 0, d = bdesc_tm;
27342 i < ARRAY_SIZE (bdesc_tm);
27345 if ((d->mask & ix86_isa_flags) != 0
27346 || (lang_hooks.builtin_function
27347 == lang_hooks.builtin_function_ext_scope))
27349 tree type, attrs, attrs_type;
27350 enum built_in_function code = (enum built_in_function) d->code;
27352 ftype = (enum ix86_builtin_func_type) d->flag;
27353 type = ix86_get_builtin_func_type (ftype);
27355 if (BUILTIN_TM_LOAD_P (code))
27357 attrs = attrs_load;
27358 attrs_type = attrs_type_load;
27360 else if (BUILTIN_TM_STORE_P (code))
27362 attrs = attrs_store;
27363 attrs_type = attrs_type_store;
27368 attrs_type = attrs_type_log;
27370 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
27371 /* The builtin without the prefix for
27372 calling it directly. */
27373 d->name + strlen ("__builtin_"),
27375 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27376 set the TYPE_ATTRIBUTES. */
27377 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
27379 set_builtin_decl (code, decl, false);
27384 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27385 in the current target ISA to allow the user to compile particular modules
27386 with different target specific options that differ from the command line
27389 ix86_init_mmx_sse_builtins (void)
27391 const struct builtin_description * d;
27392 enum ix86_builtin_func_type ftype;
27395 /* Add all special builtins with variable number of operands. */
27396 for (i = 0, d = bdesc_special_args;
27397 i < ARRAY_SIZE (bdesc_special_args);
27403 ftype = (enum ix86_builtin_func_type) d->flag;
27404 def_builtin (d->mask, d->name, ftype, d->code);
27407 /* Add all builtins with variable number of operands. */
27408 for (i = 0, d = bdesc_args;
27409 i < ARRAY_SIZE (bdesc_args);
27415 ftype = (enum ix86_builtin_func_type) d->flag;
27416 def_builtin_const (d->mask, d->name, ftype, d->code);
27419 /* pcmpestr[im] insns. */
27420 for (i = 0, d = bdesc_pcmpestr;
27421 i < ARRAY_SIZE (bdesc_pcmpestr);
27424 if (d->code == IX86_BUILTIN_PCMPESTRM128)
27425 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
27427 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
27428 def_builtin_const (d->mask, d->name, ftype, d->code);
27431 /* pcmpistr[im] insns. */
27432 for (i = 0, d = bdesc_pcmpistr;
27433 i < ARRAY_SIZE (bdesc_pcmpistr);
27436 if (d->code == IX86_BUILTIN_PCMPISTRM128)
27437 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
27439 ftype = INT_FTYPE_V16QI_V16QI_INT;
27440 def_builtin_const (d->mask, d->name, ftype, d->code);
27443 /* comi/ucomi insns. */
27444 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27446 if (d->mask == OPTION_MASK_ISA_SSE2)
27447 ftype = INT_FTYPE_V2DF_V2DF;
27449 ftype = INT_FTYPE_V4SF_V4SF;
27450 def_builtin_const (d->mask, d->name, ftype, d->code);
27454 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
27455 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
27456 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
27457 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
27459 /* SSE or 3DNow!A */
27460 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27461 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
27462 IX86_BUILTIN_MASKMOVQ);
27465 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
27466 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
27468 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
27469 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
27470 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
27471 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
27474 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
27475 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
27476 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
27477 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
27480 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
27481 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
27482 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
27483 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
27484 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
27485 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
27486 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
27487 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
27488 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
27489 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
27490 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
27491 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
27494 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
27495 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
27498 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
27499 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
27500 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
27501 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
27502 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
27503 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
27504 IX86_BUILTIN_RDRAND64_STEP);
27507 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
27508 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
27509 IX86_BUILTIN_GATHERSIV2DF);
27511 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
27512 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
27513 IX86_BUILTIN_GATHERSIV4DF);
27515 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
27516 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
27517 IX86_BUILTIN_GATHERDIV2DF);
27519 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
27520 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
27521 IX86_BUILTIN_GATHERDIV4DF);
27523 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
27524 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
27525 IX86_BUILTIN_GATHERSIV4SF);
27527 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
27528 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
27529 IX86_BUILTIN_GATHERSIV8SF);
27531 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
27532 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
27533 IX86_BUILTIN_GATHERDIV4SF);
27535 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
27536 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
27537 IX86_BUILTIN_GATHERDIV8SF);
27539 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
27540 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
27541 IX86_BUILTIN_GATHERSIV2DI);
27543 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
27544 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
27545 IX86_BUILTIN_GATHERSIV4DI);
27547 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
27548 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
27549 IX86_BUILTIN_GATHERDIV2DI);
27551 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
27552 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
27553 IX86_BUILTIN_GATHERDIV4DI);
27555 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
27556 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
27557 IX86_BUILTIN_GATHERSIV4SI);
27559 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
27560 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
27561 IX86_BUILTIN_GATHERSIV8SI);
27563 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
27564 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
27565 IX86_BUILTIN_GATHERDIV4SI);
27567 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
27568 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
27569 IX86_BUILTIN_GATHERDIV8SI);
27571 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
27572 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
27573 IX86_BUILTIN_GATHERALTSIV4DF);
27575 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
27576 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
27577 IX86_BUILTIN_GATHERALTDIV8SF);
27579 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
27580 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
27581 IX86_BUILTIN_GATHERALTSIV4DI);
27583 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
27584 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
27585 IX86_BUILTIN_GATHERALTDIV8SI);
27587 /* MMX access to the vec_init patterns. */
27588 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
27589 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
27591 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
27592 V4HI_FTYPE_HI_HI_HI_HI,
27593 IX86_BUILTIN_VEC_INIT_V4HI);
27595 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
27596 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
27597 IX86_BUILTIN_VEC_INIT_V8QI);
27599 /* Access to the vec_extract patterns. */
27600 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
27601 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
27602 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
27603 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
27604 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
27605 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
27606 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
27607 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
27608 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
27609 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
27611 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27612 "__builtin_ia32_vec_ext_v4hi",
27613 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
27615 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
27616 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
27618 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
27619 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
27621 /* Access to the vec_set patterns. */
27622 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
27623 "__builtin_ia32_vec_set_v2di",
27624 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
27626 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
27627 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
27629 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
27630 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
27632 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
27633 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
27635 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27636 "__builtin_ia32_vec_set_v4hi",
27637 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
27639 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
27640 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
27642 /* Add FMA4 multi-arg argument instructions */
27643 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27648 ftype = (enum ix86_builtin_func_type) d->flag;
27649 def_builtin_const (d->mask, d->name, ftype, d->code);
27653 /* Internal method for ix86_init_builtins. */
27656 ix86_init_builtins_va_builtins_abi (void)
27658 tree ms_va_ref, sysv_va_ref;
27659 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
27660 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
27661 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
27662 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
27666 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
27667 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
27668 ms_va_ref = build_reference_type (ms_va_list_type_node);
27670 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
27673 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
27674 fnvoid_va_start_ms =
27675 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
27676 fnvoid_va_end_sysv =
27677 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
27678 fnvoid_va_start_sysv =
27679 build_varargs_function_type_list (void_type_node, sysv_va_ref,
27681 fnvoid_va_copy_ms =
27682 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
27684 fnvoid_va_copy_sysv =
27685 build_function_type_list (void_type_node, sysv_va_ref,
27686 sysv_va_ref, NULL_TREE);
27688 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
27689 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
27690 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
27691 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
27692 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
27693 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
27694 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
27695 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27696 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
27697 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27698 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
27699 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27703 ix86_init_builtin_types (void)
27705 tree float128_type_node, float80_type_node;
27707 /* The __float80 type. */
27708 float80_type_node = long_double_type_node;
27709 if (TYPE_MODE (float80_type_node) != XFmode)
27711 /* The __float80 type. */
27712 float80_type_node = make_node (REAL_TYPE);
27714 TYPE_PRECISION (float80_type_node) = 80;
27715 layout_type (float80_type_node);
27717 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
27719 /* The __float128 type. */
27720 float128_type_node = make_node (REAL_TYPE);
27721 TYPE_PRECISION (float128_type_node) = 128;
27722 layout_type (float128_type_node);
27723 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
27725 /* This macro is built by i386-builtin-types.awk. */
27726 DEFINE_BUILTIN_PRIMITIVE_TYPES;
27730 ix86_init_builtins (void)
27734 ix86_init_builtin_types ();
27736 /* TFmode support builtins. */
27737 def_builtin_const (0, "__builtin_infq",
27738 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
27739 def_builtin_const (0, "__builtin_huge_valq",
27740 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
27742 /* We will expand them to normal call if SSE2 isn't available since
27743 they are used by libgcc. */
27744 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
27745 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
27746 BUILT_IN_MD, "__fabstf2", NULL_TREE);
27747 TREE_READONLY (t) = 1;
27748 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
27750 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
27751 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
27752 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
27753 TREE_READONLY (t) = 1;
27754 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
27756 ix86_init_tm_builtins ();
27757 ix86_init_mmx_sse_builtins ();
27760 ix86_init_builtins_va_builtins_abi ();
27762 #ifdef SUBTARGET_INIT_BUILTINS
27763 SUBTARGET_INIT_BUILTINS;
27767 /* Return the ix86 builtin for CODE. */
27770 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
27772 if (code >= IX86_BUILTIN_MAX)
27773 return error_mark_node;
27775 return ix86_builtins[code];
27778 /* Errors in the source file can cause expand_expr to return const0_rtx
27779 where we expect a vector. To avoid crashing, use one of the vector
27780 clear instructions. */
27782 safe_vector_operand (rtx x, enum machine_mode mode)
27784 if (x == const0_rtx)
27785 x = CONST0_RTX (mode);
27789 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
27792 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
27795 tree arg0 = CALL_EXPR_ARG (exp, 0);
27796 tree arg1 = CALL_EXPR_ARG (exp, 1);
27797 rtx op0 = expand_normal (arg0);
27798 rtx op1 = expand_normal (arg1);
27799 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27800 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
27801 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
27803 if (VECTOR_MODE_P (mode0))
27804 op0 = safe_vector_operand (op0, mode0);
27805 if (VECTOR_MODE_P (mode1))
27806 op1 = safe_vector_operand (op1, mode1);
27808 if (optimize || !target
27809 || GET_MODE (target) != tmode
27810 || !insn_data[icode].operand[0].predicate (target, tmode))
27811 target = gen_reg_rtx (tmode);
27813 if (GET_MODE (op1) == SImode && mode1 == TImode)
27815 rtx x = gen_reg_rtx (V4SImode);
27816 emit_insn (gen_sse2_loadd (x, op1));
27817 op1 = gen_lowpart (TImode, x);
27820 if (!insn_data[icode].operand[1].predicate (op0, mode0))
27821 op0 = copy_to_mode_reg (mode0, op0);
27822 if (!insn_data[icode].operand[2].predicate (op1, mode1))
27823 op1 = copy_to_mode_reg (mode1, op1);
27825 pat = GEN_FCN (icode) (target, op0, op1);
27834 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
27837 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
27838 enum ix86_builtin_func_type m_type,
27839 enum rtx_code sub_code)
27844 bool comparison_p = false;
27846 bool last_arg_constant = false;
27847 int num_memory = 0;
27850 enum machine_mode mode;
27853 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27857 case MULTI_ARG_4_DF2_DI_I:
27858 case MULTI_ARG_4_DF2_DI_I1:
27859 case MULTI_ARG_4_SF2_SI_I:
27860 case MULTI_ARG_4_SF2_SI_I1:
27862 last_arg_constant = true;
27865 case MULTI_ARG_3_SF:
27866 case MULTI_ARG_3_DF:
27867 case MULTI_ARG_3_SF2:
27868 case MULTI_ARG_3_DF2:
27869 case MULTI_ARG_3_DI:
27870 case MULTI_ARG_3_SI:
27871 case MULTI_ARG_3_SI_DI:
27872 case MULTI_ARG_3_HI:
27873 case MULTI_ARG_3_HI_SI:
27874 case MULTI_ARG_3_QI:
27875 case MULTI_ARG_3_DI2:
27876 case MULTI_ARG_3_SI2:
27877 case MULTI_ARG_3_HI2:
27878 case MULTI_ARG_3_QI2:
27882 case MULTI_ARG_2_SF:
27883 case MULTI_ARG_2_DF:
27884 case MULTI_ARG_2_DI:
27885 case MULTI_ARG_2_SI:
27886 case MULTI_ARG_2_HI:
27887 case MULTI_ARG_2_QI:
27891 case MULTI_ARG_2_DI_IMM:
27892 case MULTI_ARG_2_SI_IMM:
27893 case MULTI_ARG_2_HI_IMM:
27894 case MULTI_ARG_2_QI_IMM:
27896 last_arg_constant = true;
27899 case MULTI_ARG_1_SF:
27900 case MULTI_ARG_1_DF:
27901 case MULTI_ARG_1_SF2:
27902 case MULTI_ARG_1_DF2:
27903 case MULTI_ARG_1_DI:
27904 case MULTI_ARG_1_SI:
27905 case MULTI_ARG_1_HI:
27906 case MULTI_ARG_1_QI:
27907 case MULTI_ARG_1_SI_DI:
27908 case MULTI_ARG_1_HI_DI:
27909 case MULTI_ARG_1_HI_SI:
27910 case MULTI_ARG_1_QI_DI:
27911 case MULTI_ARG_1_QI_SI:
27912 case MULTI_ARG_1_QI_HI:
27916 case MULTI_ARG_2_DI_CMP:
27917 case MULTI_ARG_2_SI_CMP:
27918 case MULTI_ARG_2_HI_CMP:
27919 case MULTI_ARG_2_QI_CMP:
27921 comparison_p = true;
27924 case MULTI_ARG_2_SF_TF:
27925 case MULTI_ARG_2_DF_TF:
27926 case MULTI_ARG_2_DI_TF:
27927 case MULTI_ARG_2_SI_TF:
27928 case MULTI_ARG_2_HI_TF:
27929 case MULTI_ARG_2_QI_TF:
27935 gcc_unreachable ();
27938 if (optimize || !target
27939 || GET_MODE (target) != tmode
27940 || !insn_data[icode].operand[0].predicate (target, tmode))
27941 target = gen_reg_rtx (tmode);
27943 gcc_assert (nargs <= 4);
27945 for (i = 0; i < nargs; i++)
27947 tree arg = CALL_EXPR_ARG (exp, i);
27948 rtx op = expand_normal (arg);
27949 int adjust = (comparison_p) ? 1 : 0;
27950 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
27952 if (last_arg_constant && i == nargs - 1)
27954 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
27956 enum insn_code new_icode = icode;
27959 case CODE_FOR_xop_vpermil2v2df3:
27960 case CODE_FOR_xop_vpermil2v4sf3:
27961 case CODE_FOR_xop_vpermil2v4df3:
27962 case CODE_FOR_xop_vpermil2v8sf3:
27963 error ("the last argument must be a 2-bit immediate");
27964 return gen_reg_rtx (tmode);
27965 case CODE_FOR_xop_rotlv2di3:
27966 new_icode = CODE_FOR_rotlv2di3;
27968 case CODE_FOR_xop_rotlv4si3:
27969 new_icode = CODE_FOR_rotlv4si3;
27971 case CODE_FOR_xop_rotlv8hi3:
27972 new_icode = CODE_FOR_rotlv8hi3;
27974 case CODE_FOR_xop_rotlv16qi3:
27975 new_icode = CODE_FOR_rotlv16qi3;
27977 if (CONST_INT_P (op))
27979 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
27980 op = GEN_INT (INTVAL (op) & mask);
27981 gcc_checking_assert
27982 (insn_data[icode].operand[i + 1].predicate (op, mode));
27986 gcc_checking_assert
27988 && insn_data[new_icode].operand[0].mode == tmode
27989 && insn_data[new_icode].operand[1].mode == tmode
27990 && insn_data[new_icode].operand[2].mode == mode
27991 && insn_data[new_icode].operand[0].predicate
27992 == insn_data[icode].operand[0].predicate
27993 && insn_data[new_icode].operand[1].predicate
27994 == insn_data[icode].operand[1].predicate);
28000 gcc_unreachable ();
28007 if (VECTOR_MODE_P (mode))
28008 op = safe_vector_operand (op, mode);
28010 /* If we aren't optimizing, only allow one memory operand to be
28012 if (memory_operand (op, mode))
28015 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
28018 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
28020 op = force_reg (mode, op);
28024 args[i].mode = mode;
28030 pat = GEN_FCN (icode) (target, args[0].op);
28035 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
28036 GEN_INT ((int)sub_code));
28037 else if (! comparison_p)
28038 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
28041 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
28045 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
28050 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
28054 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
28058 gcc_unreachable ();
28068 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
28069 insns with vec_merge. */
28072 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
28076 tree arg0 = CALL_EXPR_ARG (exp, 0);
28077 rtx op1, op0 = expand_normal (arg0);
28078 enum machine_mode tmode = insn_data[icode].operand[0].mode;
28079 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
28081 if (optimize || !target
28082 || GET_MODE (target) != tmode
28083 || !insn_data[icode].operand[0].predicate (target, tmode))
28084 target = gen_reg_rtx (tmode);
28086 if (VECTOR_MODE_P (mode0))
28087 op0 = safe_vector_operand (op0, mode0);
28089 if ((optimize && !register_operand (op0, mode0))
28090 || !insn_data[icode].operand[1].predicate (op0, mode0))
28091 op0 = copy_to_mode_reg (mode0, op0);
28094 if (!insn_data[icode].operand[2].predicate (op1, mode0))
28095 op1 = copy_to_mode_reg (mode0, op1);
28097 pat = GEN_FCN (icode) (target, op0, op1);
28104 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
28107 ix86_expand_sse_compare (const struct builtin_description *d,
28108 tree exp, rtx target, bool swap)
28111 tree arg0 = CALL_EXPR_ARG (exp, 0);
28112 tree arg1 = CALL_EXPR_ARG (exp, 1);
28113 rtx op0 = expand_normal (arg0);
28114 rtx op1 = expand_normal (arg1);
28116 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28117 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28118 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
28119 enum rtx_code comparison = d->comparison;
28121 if (VECTOR_MODE_P (mode0))
28122 op0 = safe_vector_operand (op0, mode0);
28123 if (VECTOR_MODE_P (mode1))
28124 op1 = safe_vector_operand (op1, mode1);
28126 /* Swap operands if we have a comparison that isn't available in
28130 rtx tmp = gen_reg_rtx (mode1);
28131 emit_move_insn (tmp, op1);
28136 if (optimize || !target
28137 || GET_MODE (target) != tmode
28138 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28139 target = gen_reg_rtx (tmode);
28141 if ((optimize && !register_operand (op0, mode0))
28142 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
28143 op0 = copy_to_mode_reg (mode0, op0);
28144 if ((optimize && !register_operand (op1, mode1))
28145 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
28146 op1 = copy_to_mode_reg (mode1, op1);
28148 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
28149 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
28156 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28159 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
28163 tree arg0 = CALL_EXPR_ARG (exp, 0);
28164 tree arg1 = CALL_EXPR_ARG (exp, 1);
28165 rtx op0 = expand_normal (arg0);
28166 rtx op1 = expand_normal (arg1);
28167 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
28168 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
28169 enum rtx_code comparison = d->comparison;
28171 if (VECTOR_MODE_P (mode0))
28172 op0 = safe_vector_operand (op0, mode0);
28173 if (VECTOR_MODE_P (mode1))
28174 op1 = safe_vector_operand (op1, mode1);
28176 /* Swap operands if we have a comparison that isn't available in
28178 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
28185 target = gen_reg_rtx (SImode);
28186 emit_move_insn (target, const0_rtx);
28187 target = gen_rtx_SUBREG (QImode, target, 0);
28189 if ((optimize && !register_operand (op0, mode0))
28190 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28191 op0 = copy_to_mode_reg (mode0, op0);
28192 if ((optimize && !register_operand (op1, mode1))
28193 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28194 op1 = copy_to_mode_reg (mode1, op1);
28196 pat = GEN_FCN (d->icode) (op0, op1);
28200 emit_insn (gen_rtx_SET (VOIDmode,
28201 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28202 gen_rtx_fmt_ee (comparison, QImode,
28206 return SUBREG_REG (target);
28209 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28212 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
28216 tree arg0 = CALL_EXPR_ARG (exp, 0);
28217 rtx op1, op0 = expand_normal (arg0);
28218 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28219 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28221 if (optimize || target == 0
28222 || GET_MODE (target) != tmode
28223 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28224 target = gen_reg_rtx (tmode);
28226 if (VECTOR_MODE_P (mode0))
28227 op0 = safe_vector_operand (op0, mode0);
28229 if ((optimize && !register_operand (op0, mode0))
28230 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28231 op0 = copy_to_mode_reg (mode0, op0);
28233 op1 = GEN_INT (d->comparison);
28235 pat = GEN_FCN (d->icode) (target, op0, op1);
28243 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
28244 tree exp, rtx target)
28247 tree arg0 = CALL_EXPR_ARG (exp, 0);
28248 tree arg1 = CALL_EXPR_ARG (exp, 1);
28249 rtx op0 = expand_normal (arg0);
28250 rtx op1 = expand_normal (arg1);
28252 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28253 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28254 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
28256 if (optimize || target == 0
28257 || GET_MODE (target) != tmode
28258 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28259 target = gen_reg_rtx (tmode);
28261 op0 = safe_vector_operand (op0, mode0);
28262 op1 = safe_vector_operand (op1, mode1);
28264 if ((optimize && !register_operand (op0, mode0))
28265 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28266 op0 = copy_to_mode_reg (mode0, op0);
28267 if ((optimize && !register_operand (op1, mode1))
28268 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28269 op1 = copy_to_mode_reg (mode1, op1);
28271 op2 = GEN_INT (d->comparison);
28273 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
28280 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28283 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
28287 tree arg0 = CALL_EXPR_ARG (exp, 0);
28288 tree arg1 = CALL_EXPR_ARG (exp, 1);
28289 rtx op0 = expand_normal (arg0);
28290 rtx op1 = expand_normal (arg1);
28291 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
28292 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
28293 enum rtx_code comparison = d->comparison;
28295 if (VECTOR_MODE_P (mode0))
28296 op0 = safe_vector_operand (op0, mode0);
28297 if (VECTOR_MODE_P (mode1))
28298 op1 = safe_vector_operand (op1, mode1);
28300 target = gen_reg_rtx (SImode);
28301 emit_move_insn (target, const0_rtx);
28302 target = gen_rtx_SUBREG (QImode, target, 0);
28304 if ((optimize && !register_operand (op0, mode0))
28305 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28306 op0 = copy_to_mode_reg (mode0, op0);
28307 if ((optimize && !register_operand (op1, mode1))
28308 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28309 op1 = copy_to_mode_reg (mode1, op1);
28311 pat = GEN_FCN (d->icode) (op0, op1);
28315 emit_insn (gen_rtx_SET (VOIDmode,
28316 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28317 gen_rtx_fmt_ee (comparison, QImode,
28321 return SUBREG_REG (target);
28324 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28327 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
28328 tree exp, rtx target)
28331 tree arg0 = CALL_EXPR_ARG (exp, 0);
28332 tree arg1 = CALL_EXPR_ARG (exp, 1);
28333 tree arg2 = CALL_EXPR_ARG (exp, 2);
28334 tree arg3 = CALL_EXPR_ARG (exp, 3);
28335 tree arg4 = CALL_EXPR_ARG (exp, 4);
28336 rtx scratch0, scratch1;
28337 rtx op0 = expand_normal (arg0);
28338 rtx op1 = expand_normal (arg1);
28339 rtx op2 = expand_normal (arg2);
28340 rtx op3 = expand_normal (arg3);
28341 rtx op4 = expand_normal (arg4);
28342 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
28344 tmode0 = insn_data[d->icode].operand[0].mode;
28345 tmode1 = insn_data[d->icode].operand[1].mode;
28346 modev2 = insn_data[d->icode].operand[2].mode;
28347 modei3 = insn_data[d->icode].operand[3].mode;
28348 modev4 = insn_data[d->icode].operand[4].mode;
28349 modei5 = insn_data[d->icode].operand[5].mode;
28350 modeimm = insn_data[d->icode].operand[6].mode;
28352 if (VECTOR_MODE_P (modev2))
28353 op0 = safe_vector_operand (op0, modev2);
28354 if (VECTOR_MODE_P (modev4))
28355 op2 = safe_vector_operand (op2, modev4);
28357 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
28358 op0 = copy_to_mode_reg (modev2, op0);
28359 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
28360 op1 = copy_to_mode_reg (modei3, op1);
28361 if ((optimize && !register_operand (op2, modev4))
28362 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
28363 op2 = copy_to_mode_reg (modev4, op2);
28364 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
28365 op3 = copy_to_mode_reg (modei5, op3);
28367 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
28369 error ("the fifth argument must be an 8-bit immediate");
28373 if (d->code == IX86_BUILTIN_PCMPESTRI128)
28375 if (optimize || !target
28376 || GET_MODE (target) != tmode0
28377 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
28378 target = gen_reg_rtx (tmode0);
28380 scratch1 = gen_reg_rtx (tmode1);
28382 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
28384 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
28386 if (optimize || !target
28387 || GET_MODE (target) != tmode1
28388 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
28389 target = gen_reg_rtx (tmode1);
28391 scratch0 = gen_reg_rtx (tmode0);
28393 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
28397 gcc_assert (d->flag);
28399 scratch0 = gen_reg_rtx (tmode0);
28400 scratch1 = gen_reg_rtx (tmode1);
28402 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
28412 target = gen_reg_rtx (SImode);
28413 emit_move_insn (target, const0_rtx);
28414 target = gen_rtx_SUBREG (QImode, target, 0);
28417 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28418 gen_rtx_fmt_ee (EQ, QImode,
28419 gen_rtx_REG ((enum machine_mode) d->flag,
28422 return SUBREG_REG (target);
28429 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
28432 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
28433 tree exp, rtx target)
28436 tree arg0 = CALL_EXPR_ARG (exp, 0);
28437 tree arg1 = CALL_EXPR_ARG (exp, 1);
28438 tree arg2 = CALL_EXPR_ARG (exp, 2);
28439 rtx scratch0, scratch1;
28440 rtx op0 = expand_normal (arg0);
28441 rtx op1 = expand_normal (arg1);
28442 rtx op2 = expand_normal (arg2);
28443 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
28445 tmode0 = insn_data[d->icode].operand[0].mode;
28446 tmode1 = insn_data[d->icode].operand[1].mode;
28447 modev2 = insn_data[d->icode].operand[2].mode;
28448 modev3 = insn_data[d->icode].operand[3].mode;
28449 modeimm = insn_data[d->icode].operand[4].mode;
28451 if (VECTOR_MODE_P (modev2))
28452 op0 = safe_vector_operand (op0, modev2);
28453 if (VECTOR_MODE_P (modev3))
28454 op1 = safe_vector_operand (op1, modev3);
28456 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
28457 op0 = copy_to_mode_reg (modev2, op0);
28458 if ((optimize && !register_operand (op1, modev3))
28459 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
28460 op1 = copy_to_mode_reg (modev3, op1);
28462 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
28464 error ("the third argument must be an 8-bit immediate");
28468 if (d->code == IX86_BUILTIN_PCMPISTRI128)
28470 if (optimize || !target
28471 || GET_MODE (target) != tmode0
28472 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
28473 target = gen_reg_rtx (tmode0);
28475 scratch1 = gen_reg_rtx (tmode1);
28477 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
28479 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
28481 if (optimize || !target
28482 || GET_MODE (target) != tmode1
28483 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
28484 target = gen_reg_rtx (tmode1);
28486 scratch0 = gen_reg_rtx (tmode0);
28488 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
28492 gcc_assert (d->flag);
28494 scratch0 = gen_reg_rtx (tmode0);
28495 scratch1 = gen_reg_rtx (tmode1);
28497 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
28507 target = gen_reg_rtx (SImode);
28508 emit_move_insn (target, const0_rtx);
28509 target = gen_rtx_SUBREG (QImode, target, 0);
28512 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28513 gen_rtx_fmt_ee (EQ, QImode,
28514 gen_rtx_REG ((enum machine_mode) d->flag,
28517 return SUBREG_REG (target);
28523 /* Subroutine of ix86_expand_builtin to take care of insns with
28524 variable number of operands. */
28527 ix86_expand_args_builtin (const struct builtin_description *d,
28528 tree exp, rtx target)
28530 rtx pat, real_target;
28531 unsigned int i, nargs;
28532 unsigned int nargs_constant = 0;
28533 int num_memory = 0;
28537 enum machine_mode mode;
28539 bool last_arg_count = false;
28540 enum insn_code icode = d->icode;
28541 const struct insn_data_d *insn_p = &insn_data[icode];
28542 enum machine_mode tmode = insn_p->operand[0].mode;
28543 enum machine_mode rmode = VOIDmode;
28545 enum rtx_code comparison = d->comparison;
28547 switch ((enum ix86_builtin_func_type) d->flag)
28549 case V2DF_FTYPE_V2DF_ROUND:
28550 case V4DF_FTYPE_V4DF_ROUND:
28551 case V4SF_FTYPE_V4SF_ROUND:
28552 case V8SF_FTYPE_V8SF_ROUND:
28553 case V4SI_FTYPE_V4SF_ROUND:
28554 case V8SI_FTYPE_V8SF_ROUND:
28555 return ix86_expand_sse_round (d, exp, target);
28556 case V4SI_FTYPE_V2DF_V2DF_ROUND:
28557 case V8SI_FTYPE_V4DF_V4DF_ROUND:
28558 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
28559 case INT_FTYPE_V8SF_V8SF_PTEST:
28560 case INT_FTYPE_V4DI_V4DI_PTEST:
28561 case INT_FTYPE_V4DF_V4DF_PTEST:
28562 case INT_FTYPE_V4SF_V4SF_PTEST:
28563 case INT_FTYPE_V2DI_V2DI_PTEST:
28564 case INT_FTYPE_V2DF_V2DF_PTEST:
28565 return ix86_expand_sse_ptest (d, exp, target);
28566 case FLOAT128_FTYPE_FLOAT128:
28567 case FLOAT_FTYPE_FLOAT:
28568 case INT_FTYPE_INT:
28569 case UINT64_FTYPE_INT:
28570 case UINT16_FTYPE_UINT16:
28571 case INT64_FTYPE_INT64:
28572 case INT64_FTYPE_V4SF:
28573 case INT64_FTYPE_V2DF:
28574 case INT_FTYPE_V16QI:
28575 case INT_FTYPE_V8QI:
28576 case INT_FTYPE_V8SF:
28577 case INT_FTYPE_V4DF:
28578 case INT_FTYPE_V4SF:
28579 case INT_FTYPE_V2DF:
28580 case INT_FTYPE_V32QI:
28581 case V16QI_FTYPE_V16QI:
28582 case V8SI_FTYPE_V8SF:
28583 case V8SI_FTYPE_V4SI:
28584 case V8HI_FTYPE_V8HI:
28585 case V8HI_FTYPE_V16QI:
28586 case V8QI_FTYPE_V8QI:
28587 case V8SF_FTYPE_V8SF:
28588 case V8SF_FTYPE_V8SI:
28589 case V8SF_FTYPE_V4SF:
28590 case V8SF_FTYPE_V8HI:
28591 case V4SI_FTYPE_V4SI:
28592 case V4SI_FTYPE_V16QI:
28593 case V4SI_FTYPE_V4SF:
28594 case V4SI_FTYPE_V8SI:
28595 case V4SI_FTYPE_V8HI:
28596 case V4SI_FTYPE_V4DF:
28597 case V4SI_FTYPE_V2DF:
28598 case V4HI_FTYPE_V4HI:
28599 case V4DF_FTYPE_V4DF:
28600 case V4DF_FTYPE_V4SI:
28601 case V4DF_FTYPE_V4SF:
28602 case V4DF_FTYPE_V2DF:
28603 case V4SF_FTYPE_V4SF:
28604 case V4SF_FTYPE_V4SI:
28605 case V4SF_FTYPE_V8SF:
28606 case V4SF_FTYPE_V4DF:
28607 case V4SF_FTYPE_V8HI:
28608 case V4SF_FTYPE_V2DF:
28609 case V2DI_FTYPE_V2DI:
28610 case V2DI_FTYPE_V16QI:
28611 case V2DI_FTYPE_V8HI:
28612 case V2DI_FTYPE_V4SI:
28613 case V2DF_FTYPE_V2DF:
28614 case V2DF_FTYPE_V4SI:
28615 case V2DF_FTYPE_V4DF:
28616 case V2DF_FTYPE_V4SF:
28617 case V2DF_FTYPE_V2SI:
28618 case V2SI_FTYPE_V2SI:
28619 case V2SI_FTYPE_V4SF:
28620 case V2SI_FTYPE_V2SF:
28621 case V2SI_FTYPE_V2DF:
28622 case V2SF_FTYPE_V2SF:
28623 case V2SF_FTYPE_V2SI:
28624 case V32QI_FTYPE_V32QI:
28625 case V32QI_FTYPE_V16QI:
28626 case V16HI_FTYPE_V16HI:
28627 case V16HI_FTYPE_V8HI:
28628 case V8SI_FTYPE_V8SI:
28629 case V16HI_FTYPE_V16QI:
28630 case V8SI_FTYPE_V16QI:
28631 case V4DI_FTYPE_V16QI:
28632 case V8SI_FTYPE_V8HI:
28633 case V4DI_FTYPE_V8HI:
28634 case V4DI_FTYPE_V4SI:
28635 case V4DI_FTYPE_V2DI:
28638 case V4SF_FTYPE_V4SF_VEC_MERGE:
28639 case V2DF_FTYPE_V2DF_VEC_MERGE:
28640 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
28641 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
28642 case V16QI_FTYPE_V16QI_V16QI:
28643 case V16QI_FTYPE_V8HI_V8HI:
28644 case V8QI_FTYPE_V8QI_V8QI:
28645 case V8QI_FTYPE_V4HI_V4HI:
28646 case V8HI_FTYPE_V8HI_V8HI:
28647 case V8HI_FTYPE_V16QI_V16QI:
28648 case V8HI_FTYPE_V4SI_V4SI:
28649 case V8SF_FTYPE_V8SF_V8SF:
28650 case V8SF_FTYPE_V8SF_V8SI:
28651 case V4SI_FTYPE_V4SI_V4SI:
28652 case V4SI_FTYPE_V8HI_V8HI:
28653 case V4SI_FTYPE_V4SF_V4SF:
28654 case V4SI_FTYPE_V2DF_V2DF:
28655 case V4HI_FTYPE_V4HI_V4HI:
28656 case V4HI_FTYPE_V8QI_V8QI:
28657 case V4HI_FTYPE_V2SI_V2SI:
28658 case V4DF_FTYPE_V4DF_V4DF:
28659 case V4DF_FTYPE_V4DF_V4DI:
28660 case V4SF_FTYPE_V4SF_V4SF:
28661 case V4SF_FTYPE_V4SF_V4SI:
28662 case V4SF_FTYPE_V4SF_V2SI:
28663 case V4SF_FTYPE_V4SF_V2DF:
28664 case V4SF_FTYPE_V4SF_DI:
28665 case V4SF_FTYPE_V4SF_SI:
28666 case V2DI_FTYPE_V2DI_V2DI:
28667 case V2DI_FTYPE_V16QI_V16QI:
28668 case V2DI_FTYPE_V4SI_V4SI:
28669 case V2DI_FTYPE_V2DI_V16QI:
28670 case V2DI_FTYPE_V2DF_V2DF:
28671 case V2SI_FTYPE_V2SI_V2SI:
28672 case V2SI_FTYPE_V4HI_V4HI:
28673 case V2SI_FTYPE_V2SF_V2SF:
28674 case V2DF_FTYPE_V2DF_V2DF:
28675 case V2DF_FTYPE_V2DF_V4SF:
28676 case V2DF_FTYPE_V2DF_V2DI:
28677 case V2DF_FTYPE_V2DF_DI:
28678 case V2DF_FTYPE_V2DF_SI:
28679 case V2SF_FTYPE_V2SF_V2SF:
28680 case V1DI_FTYPE_V1DI_V1DI:
28681 case V1DI_FTYPE_V8QI_V8QI:
28682 case V1DI_FTYPE_V2SI_V2SI:
28683 case V32QI_FTYPE_V16HI_V16HI:
28684 case V16HI_FTYPE_V8SI_V8SI:
28685 case V32QI_FTYPE_V32QI_V32QI:
28686 case V16HI_FTYPE_V32QI_V32QI:
28687 case V16HI_FTYPE_V16HI_V16HI:
28688 case V8SI_FTYPE_V4DF_V4DF:
28689 case V8SI_FTYPE_V8SI_V8SI:
28690 case V8SI_FTYPE_V16HI_V16HI:
28691 case V4DI_FTYPE_V4DI_V4DI:
28692 case V4DI_FTYPE_V8SI_V8SI:
28693 if (comparison == UNKNOWN)
28694 return ix86_expand_binop_builtin (icode, exp, target);
28697 case V4SF_FTYPE_V4SF_V4SF_SWAP:
28698 case V2DF_FTYPE_V2DF_V2DF_SWAP:
28699 gcc_assert (comparison != UNKNOWN);
28703 case V16HI_FTYPE_V16HI_V8HI_COUNT:
28704 case V16HI_FTYPE_V16HI_SI_COUNT:
28705 case V8SI_FTYPE_V8SI_V4SI_COUNT:
28706 case V8SI_FTYPE_V8SI_SI_COUNT:
28707 case V4DI_FTYPE_V4DI_V2DI_COUNT:
28708 case V4DI_FTYPE_V4DI_INT_COUNT:
28709 case V8HI_FTYPE_V8HI_V8HI_COUNT:
28710 case V8HI_FTYPE_V8HI_SI_COUNT:
28711 case V4SI_FTYPE_V4SI_V4SI_COUNT:
28712 case V4SI_FTYPE_V4SI_SI_COUNT:
28713 case V4HI_FTYPE_V4HI_V4HI_COUNT:
28714 case V4HI_FTYPE_V4HI_SI_COUNT:
28715 case V2DI_FTYPE_V2DI_V2DI_COUNT:
28716 case V2DI_FTYPE_V2DI_SI_COUNT:
28717 case V2SI_FTYPE_V2SI_V2SI_COUNT:
28718 case V2SI_FTYPE_V2SI_SI_COUNT:
28719 case V1DI_FTYPE_V1DI_V1DI_COUNT:
28720 case V1DI_FTYPE_V1DI_SI_COUNT:
28722 last_arg_count = true;
28724 case UINT64_FTYPE_UINT64_UINT64:
28725 case UINT_FTYPE_UINT_UINT:
28726 case UINT_FTYPE_UINT_USHORT:
28727 case UINT_FTYPE_UINT_UCHAR:
28728 case UINT16_FTYPE_UINT16_INT:
28729 case UINT8_FTYPE_UINT8_INT:
28732 case V2DI_FTYPE_V2DI_INT_CONVERT:
28735 nargs_constant = 1;
28737 case V4DI_FTYPE_V4DI_INT_CONVERT:
28740 nargs_constant = 1;
28742 case V8HI_FTYPE_V8HI_INT:
28743 case V8HI_FTYPE_V8SF_INT:
28744 case V8HI_FTYPE_V4SF_INT:
28745 case V8SF_FTYPE_V8SF_INT:
28746 case V4SI_FTYPE_V4SI_INT:
28747 case V4SI_FTYPE_V8SI_INT:
28748 case V4HI_FTYPE_V4HI_INT:
28749 case V4DF_FTYPE_V4DF_INT:
28750 case V4SF_FTYPE_V4SF_INT:
28751 case V4SF_FTYPE_V8SF_INT:
28752 case V2DI_FTYPE_V2DI_INT:
28753 case V2DF_FTYPE_V2DF_INT:
28754 case V2DF_FTYPE_V4DF_INT:
28755 case V16HI_FTYPE_V16HI_INT:
28756 case V8SI_FTYPE_V8SI_INT:
28757 case V4DI_FTYPE_V4DI_INT:
28758 case V2DI_FTYPE_V4DI_INT:
28760 nargs_constant = 1;
28762 case V16QI_FTYPE_V16QI_V16QI_V16QI:
28763 case V8SF_FTYPE_V8SF_V8SF_V8SF:
28764 case V4DF_FTYPE_V4DF_V4DF_V4DF:
28765 case V4SF_FTYPE_V4SF_V4SF_V4SF:
28766 case V2DF_FTYPE_V2DF_V2DF_V2DF:
28767 case V32QI_FTYPE_V32QI_V32QI_V32QI:
28770 case V32QI_FTYPE_V32QI_V32QI_INT:
28771 case V16HI_FTYPE_V16HI_V16HI_INT:
28772 case V16QI_FTYPE_V16QI_V16QI_INT:
28773 case V4DI_FTYPE_V4DI_V4DI_INT:
28774 case V8HI_FTYPE_V8HI_V8HI_INT:
28775 case V8SI_FTYPE_V8SI_V8SI_INT:
28776 case V8SI_FTYPE_V8SI_V4SI_INT:
28777 case V8SF_FTYPE_V8SF_V8SF_INT:
28778 case V8SF_FTYPE_V8SF_V4SF_INT:
28779 case V4SI_FTYPE_V4SI_V4SI_INT:
28780 case V4DF_FTYPE_V4DF_V4DF_INT:
28781 case V4DF_FTYPE_V4DF_V2DF_INT:
28782 case V4SF_FTYPE_V4SF_V4SF_INT:
28783 case V2DI_FTYPE_V2DI_V2DI_INT:
28784 case V4DI_FTYPE_V4DI_V2DI_INT:
28785 case V2DF_FTYPE_V2DF_V2DF_INT:
28787 nargs_constant = 1;
28789 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
28792 nargs_constant = 1;
28794 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
28797 nargs_constant = 1;
28799 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
28802 nargs_constant = 1;
28804 case V2DI_FTYPE_V2DI_UINT_UINT:
28806 nargs_constant = 2;
28808 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
28809 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
28810 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
28811 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
28813 nargs_constant = 1;
28815 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
28817 nargs_constant = 2;
28820 gcc_unreachable ();
28823 gcc_assert (nargs <= ARRAY_SIZE (args));
28825 if (comparison != UNKNOWN)
28827 gcc_assert (nargs == 2);
28828 return ix86_expand_sse_compare (d, exp, target, swap);
28831 if (rmode == VOIDmode || rmode == tmode)
28835 || GET_MODE (target) != tmode
28836 || !insn_p->operand[0].predicate (target, tmode))
28837 target = gen_reg_rtx (tmode);
28838 real_target = target;
28842 target = gen_reg_rtx (rmode);
28843 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
28846 for (i = 0; i < nargs; i++)
28848 tree arg = CALL_EXPR_ARG (exp, i);
28849 rtx op = expand_normal (arg);
28850 enum machine_mode mode = insn_p->operand[i + 1].mode;
28851 bool match = insn_p->operand[i + 1].predicate (op, mode);
28853 if (last_arg_count && (i + 1) == nargs)
28855 /* SIMD shift insns take either an 8-bit immediate or
28856 register as count. But builtin functions take int as
28857 count. If count doesn't match, we put it in register. */
28860 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
28861 if (!insn_p->operand[i + 1].predicate (op, mode))
28862 op = copy_to_reg (op);
28865 else if ((nargs - i) <= nargs_constant)
28870 case CODE_FOR_avx2_inserti128:
28871 case CODE_FOR_avx2_extracti128:
28872 error ("the last argument must be an 1-bit immediate");
28875 case CODE_FOR_sse4_1_roundsd:
28876 case CODE_FOR_sse4_1_roundss:
28878 case CODE_FOR_sse4_1_roundpd:
28879 case CODE_FOR_sse4_1_roundps:
28880 case CODE_FOR_avx_roundpd256:
28881 case CODE_FOR_avx_roundps256:
28883 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
28884 case CODE_FOR_sse4_1_roundps_sfix:
28885 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
28886 case CODE_FOR_avx_roundps_sfix256:
28888 case CODE_FOR_sse4_1_blendps:
28889 case CODE_FOR_avx_blendpd256:
28890 case CODE_FOR_avx_vpermilv4df:
28891 error ("the last argument must be a 4-bit immediate");
28894 case CODE_FOR_sse4_1_blendpd:
28895 case CODE_FOR_avx_vpermilv2df:
28896 case CODE_FOR_xop_vpermil2v2df3:
28897 case CODE_FOR_xop_vpermil2v4sf3:
28898 case CODE_FOR_xop_vpermil2v4df3:
28899 case CODE_FOR_xop_vpermil2v8sf3:
28900 error ("the last argument must be a 2-bit immediate");
28903 case CODE_FOR_avx_vextractf128v4df:
28904 case CODE_FOR_avx_vextractf128v8sf:
28905 case CODE_FOR_avx_vextractf128v8si:
28906 case CODE_FOR_avx_vinsertf128v4df:
28907 case CODE_FOR_avx_vinsertf128v8sf:
28908 case CODE_FOR_avx_vinsertf128v8si:
28909 error ("the last argument must be a 1-bit immediate");
28912 case CODE_FOR_avx_vmcmpv2df3:
28913 case CODE_FOR_avx_vmcmpv4sf3:
28914 case CODE_FOR_avx_cmpv2df3:
28915 case CODE_FOR_avx_cmpv4sf3:
28916 case CODE_FOR_avx_cmpv4df3:
28917 case CODE_FOR_avx_cmpv8sf3:
28918 error ("the last argument must be a 5-bit immediate");
28922 switch (nargs_constant)
28925 if ((nargs - i) == nargs_constant)
28927 error ("the next to last argument must be an 8-bit immediate");
28931 error ("the last argument must be an 8-bit immediate");
28934 gcc_unreachable ();
28941 if (VECTOR_MODE_P (mode))
28942 op = safe_vector_operand (op, mode);
28944 /* If we aren't optimizing, only allow one memory operand to
28946 if (memory_operand (op, mode))
28949 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
28951 if (optimize || !match || num_memory > 1)
28952 op = copy_to_mode_reg (mode, op);
28956 op = copy_to_reg (op);
28957 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
28962 args[i].mode = mode;
28968 pat = GEN_FCN (icode) (real_target, args[0].op);
28971 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
28974 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
28978 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
28979 args[2].op, args[3].op);
28982 gcc_unreachable ();
28992 /* Subroutine of ix86_expand_builtin to take care of special insns
28993 with variable number of operands. */
28996 ix86_expand_special_args_builtin (const struct builtin_description *d,
28997 tree exp, rtx target)
29001 unsigned int i, nargs, arg_adjust, memory;
29005 enum machine_mode mode;
29007 enum insn_code icode = d->icode;
29008 bool last_arg_constant = false;
29009 const struct insn_data_d *insn_p = &insn_data[icode];
29010 enum machine_mode tmode = insn_p->operand[0].mode;
29011 enum { load, store } klass;
29013 switch ((enum ix86_builtin_func_type) d->flag)
29015 case VOID_FTYPE_VOID:
29016 if (icode == CODE_FOR_avx_vzeroupper)
29017 target = GEN_INT (vzeroupper_intrinsic);
29018 emit_insn (GEN_FCN (icode) (target));
29020 case VOID_FTYPE_UINT64:
29021 case VOID_FTYPE_UNSIGNED:
29026 case UINT64_FTYPE_VOID:
29027 case UNSIGNED_FTYPE_VOID:
29032 case UINT64_FTYPE_PUNSIGNED:
29033 case V2DI_FTYPE_PV2DI:
29034 case V4DI_FTYPE_PV4DI:
29035 case V32QI_FTYPE_PCCHAR:
29036 case V16QI_FTYPE_PCCHAR:
29037 case V8SF_FTYPE_PCV4SF:
29038 case V8SF_FTYPE_PCFLOAT:
29039 case V4SF_FTYPE_PCFLOAT:
29040 case V4DF_FTYPE_PCV2DF:
29041 case V4DF_FTYPE_PCDOUBLE:
29042 case V2DF_FTYPE_PCDOUBLE:
29043 case VOID_FTYPE_PVOID:
29048 case VOID_FTYPE_PV2SF_V4SF:
29049 case VOID_FTYPE_PV4DI_V4DI:
29050 case VOID_FTYPE_PV2DI_V2DI:
29051 case VOID_FTYPE_PCHAR_V32QI:
29052 case VOID_FTYPE_PCHAR_V16QI:
29053 case VOID_FTYPE_PFLOAT_V8SF:
29054 case VOID_FTYPE_PFLOAT_V4SF:
29055 case VOID_FTYPE_PDOUBLE_V4DF:
29056 case VOID_FTYPE_PDOUBLE_V2DF:
29057 case VOID_FTYPE_PLONGLONG_LONGLONG:
29058 case VOID_FTYPE_PULONGLONG_ULONGLONG:
29059 case VOID_FTYPE_PINT_INT:
29062 /* Reserve memory operand for target. */
29063 memory = ARRAY_SIZE (args);
29065 case V4SF_FTYPE_V4SF_PCV2SF:
29066 case V2DF_FTYPE_V2DF_PCDOUBLE:
29071 case V8SF_FTYPE_PCV8SF_V8SI:
29072 case V4DF_FTYPE_PCV4DF_V4DI:
29073 case V4SF_FTYPE_PCV4SF_V4SI:
29074 case V2DF_FTYPE_PCV2DF_V2DI:
29075 case V8SI_FTYPE_PCV8SI_V8SI:
29076 case V4DI_FTYPE_PCV4DI_V4DI:
29077 case V4SI_FTYPE_PCV4SI_V4SI:
29078 case V2DI_FTYPE_PCV2DI_V2DI:
29083 case VOID_FTYPE_PV8SF_V8SI_V8SF:
29084 case VOID_FTYPE_PV4DF_V4DI_V4DF:
29085 case VOID_FTYPE_PV4SF_V4SI_V4SF:
29086 case VOID_FTYPE_PV2DF_V2DI_V2DF:
29087 case VOID_FTYPE_PV8SI_V8SI_V8SI:
29088 case VOID_FTYPE_PV4DI_V4DI_V4DI:
29089 case VOID_FTYPE_PV4SI_V4SI_V4SI:
29090 case VOID_FTYPE_PV2DI_V2DI_V2DI:
29093 /* Reserve memory operand for target. */
29094 memory = ARRAY_SIZE (args);
29096 case VOID_FTYPE_UINT_UINT_UINT:
29097 case VOID_FTYPE_UINT64_UINT_UINT:
29098 case UCHAR_FTYPE_UINT_UINT_UINT:
29099 case UCHAR_FTYPE_UINT64_UINT_UINT:
29102 memory = ARRAY_SIZE (args);
29103 last_arg_constant = true;
29106 gcc_unreachable ();
29109 gcc_assert (nargs <= ARRAY_SIZE (args));
29111 if (klass == store)
29113 arg = CALL_EXPR_ARG (exp, 0);
29114 op = expand_normal (arg);
29115 gcc_assert (target == 0);
29118 if (GET_MODE (op) != Pmode)
29119 op = convert_to_mode (Pmode, op, 1);
29120 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
29123 target = force_reg (tmode, op);
29131 || !register_operand (target, tmode)
29132 || GET_MODE (target) != tmode)
29133 target = gen_reg_rtx (tmode);
29136 for (i = 0; i < nargs; i++)
29138 enum machine_mode mode = insn_p->operand[i + 1].mode;
29141 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
29142 op = expand_normal (arg);
29143 match = insn_p->operand[i + 1].predicate (op, mode);
29145 if (last_arg_constant && (i + 1) == nargs)
29149 if (icode == CODE_FOR_lwp_lwpvalsi3
29150 || icode == CODE_FOR_lwp_lwpinssi3
29151 || icode == CODE_FOR_lwp_lwpvaldi3
29152 || icode == CODE_FOR_lwp_lwpinsdi3)
29153 error ("the last argument must be a 32-bit immediate");
29155 error ("the last argument must be an 8-bit immediate");
29163 /* This must be the memory operand. */
29164 if (GET_MODE (op) != Pmode)
29165 op = convert_to_mode (Pmode, op, 1);
29166 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
29167 gcc_assert (GET_MODE (op) == mode
29168 || GET_MODE (op) == VOIDmode);
29172 /* This must be register. */
29173 if (VECTOR_MODE_P (mode))
29174 op = safe_vector_operand (op, mode);
29176 gcc_assert (GET_MODE (op) == mode
29177 || GET_MODE (op) == VOIDmode);
29178 op = copy_to_mode_reg (mode, op);
29183 args[i].mode = mode;
29189 pat = GEN_FCN (icode) (target);
29192 pat = GEN_FCN (icode) (target, args[0].op);
29195 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
29198 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
29201 gcc_unreachable ();
29207 return klass == store ? 0 : target;
29210 /* Return the integer constant in ARG. Constrain it to be in the range
29211 of the subparts of VEC_TYPE; issue an error if not. */
29214 get_element_number (tree vec_type, tree arg)
29216 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
29218 if (!host_integerp (arg, 1)
29219 || (elt = tree_low_cst (arg, 1), elt > max))
29221 error ("selector must be an integer constant in the range 0..%wi", max);
29228 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29229 ix86_expand_vector_init. We DO have language-level syntax for this, in
29230 the form of (type){ init-list }. Except that since we can't place emms
29231 instructions from inside the compiler, we can't allow the use of MMX
29232 registers unless the user explicitly asks for it. So we do *not* define
29233 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29234 we have builtins invoked by mmintrin.h that gives us license to emit
29235 these sorts of instructions. */
29238 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
29240 enum machine_mode tmode = TYPE_MODE (type);
29241 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
29242 int i, n_elt = GET_MODE_NUNITS (tmode);
29243 rtvec v = rtvec_alloc (n_elt);
29245 gcc_assert (VECTOR_MODE_P (tmode));
29246 gcc_assert (call_expr_nargs (exp) == n_elt);
29248 for (i = 0; i < n_elt; ++i)
29250 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
29251 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
29254 if (!target || !register_operand (target, tmode))
29255 target = gen_reg_rtx (tmode);
29257 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
29261 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29262 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29263 had a language-level syntax for referencing vector elements. */
29266 ix86_expand_vec_ext_builtin (tree exp, rtx target)
29268 enum machine_mode tmode, mode0;
29273 arg0 = CALL_EXPR_ARG (exp, 0);
29274 arg1 = CALL_EXPR_ARG (exp, 1);
29276 op0 = expand_normal (arg0);
29277 elt = get_element_number (TREE_TYPE (arg0), arg1);
29279 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
29280 mode0 = TYPE_MODE (TREE_TYPE (arg0));
29281 gcc_assert (VECTOR_MODE_P (mode0));
29283 op0 = force_reg (mode0, op0);
29285 if (optimize || !target || !register_operand (target, tmode))
29286 target = gen_reg_rtx (tmode);
29288 ix86_expand_vector_extract (true, target, op0, elt);
29293 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29294 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29295 a language-level syntax for referencing vector elements. */
29298 ix86_expand_vec_set_builtin (tree exp)
29300 enum machine_mode tmode, mode1;
29301 tree arg0, arg1, arg2;
29303 rtx op0, op1, target;
29305 arg0 = CALL_EXPR_ARG (exp, 0);
29306 arg1 = CALL_EXPR_ARG (exp, 1);
29307 arg2 = CALL_EXPR_ARG (exp, 2);
29309 tmode = TYPE_MODE (TREE_TYPE (arg0));
29310 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
29311 gcc_assert (VECTOR_MODE_P (tmode));
29313 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
29314 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
29315 elt = get_element_number (TREE_TYPE (arg0), arg2);
29317 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
29318 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
29320 op0 = force_reg (tmode, op0);
29321 op1 = force_reg (mode1, op1);
29323 /* OP0 is the source of these builtin functions and shouldn't be
29324 modified. Create a copy, use it and return it as target. */
29325 target = gen_reg_rtx (tmode);
29326 emit_move_insn (target, op0);
29327 ix86_expand_vector_set (true, target, op1, elt);
29332 /* Expand an expression EXP that calls a built-in function,
29333 with result going to TARGET if that's convenient
29334 (and in mode MODE if that's convenient).
29335 SUBTARGET may be used as the target for computing one of EXP's operands.
29336 IGNORE is nonzero if the value is to be ignored. */
29339 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
29340 enum machine_mode mode ATTRIBUTE_UNUSED,
29341 int ignore ATTRIBUTE_UNUSED)
29343 const struct builtin_description *d;
29345 enum insn_code icode;
29346 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
29347 tree arg0, arg1, arg2, arg3, arg4;
29348 rtx op0, op1, op2, op3, op4, pat;
29349 enum machine_mode mode0, mode1, mode2, mode3, mode4;
29350 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
29352 /* Determine whether the builtin function is available under the current ISA.
29353 Originally the builtin was not created if it wasn't applicable to the
29354 current ISA based on the command line switches. With function specific
29355 options, we need to check in the context of the function making the call
29356 whether it is supported. */
29357 if (ix86_builtins_isa[fcode].isa
29358 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
29360 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
29361 NULL, (enum fpmath_unit) 0, false);
29364 error ("%qE needs unknown isa option", fndecl);
29367 gcc_assert (opts != NULL);
29368 error ("%qE needs isa option %s", fndecl, opts);
29376 case IX86_BUILTIN_MASKMOVQ:
29377 case IX86_BUILTIN_MASKMOVDQU:
29378 icode = (fcode == IX86_BUILTIN_MASKMOVQ
29379 ? CODE_FOR_mmx_maskmovq
29380 : CODE_FOR_sse2_maskmovdqu);
29381 /* Note the arg order is different from the operand order. */
29382 arg1 = CALL_EXPR_ARG (exp, 0);
29383 arg2 = CALL_EXPR_ARG (exp, 1);
29384 arg0 = CALL_EXPR_ARG (exp, 2);
29385 op0 = expand_normal (arg0);
29386 op1 = expand_normal (arg1);
29387 op2 = expand_normal (arg2);
29388 mode0 = insn_data[icode].operand[0].mode;
29389 mode1 = insn_data[icode].operand[1].mode;
29390 mode2 = insn_data[icode].operand[2].mode;
29392 if (GET_MODE (op0) != Pmode)
29393 op0 = convert_to_mode (Pmode, op0, 1);
29394 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
29396 if (!insn_data[icode].operand[0].predicate (op0, mode0))
29397 op0 = copy_to_mode_reg (mode0, op0);
29398 if (!insn_data[icode].operand[1].predicate (op1, mode1))
29399 op1 = copy_to_mode_reg (mode1, op1);
29400 if (!insn_data[icode].operand[2].predicate (op2, mode2))
29401 op2 = copy_to_mode_reg (mode2, op2);
29402 pat = GEN_FCN (icode) (op0, op1, op2);
29408 case IX86_BUILTIN_LDMXCSR:
29409 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
29410 target = assign_386_stack_local (SImode, SLOT_TEMP);
29411 emit_move_insn (target, op0);
29412 emit_insn (gen_sse_ldmxcsr (target));
29415 case IX86_BUILTIN_STMXCSR:
29416 target = assign_386_stack_local (SImode, SLOT_TEMP);
29417 emit_insn (gen_sse_stmxcsr (target));
29418 return copy_to_mode_reg (SImode, target);
29420 case IX86_BUILTIN_CLFLUSH:
29421 arg0 = CALL_EXPR_ARG (exp, 0);
29422 op0 = expand_normal (arg0);
29423 icode = CODE_FOR_sse2_clflush;
29424 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
29426 if (GET_MODE (op0) != Pmode)
29427 op0 = convert_to_mode (Pmode, op0, 1);
29428 op0 = force_reg (Pmode, op0);
29431 emit_insn (gen_sse2_clflush (op0));
29434 case IX86_BUILTIN_MONITOR:
29435 arg0 = CALL_EXPR_ARG (exp, 0);
29436 arg1 = CALL_EXPR_ARG (exp, 1);
29437 arg2 = CALL_EXPR_ARG (exp, 2);
29438 op0 = expand_normal (arg0);
29439 op1 = expand_normal (arg1);
29440 op2 = expand_normal (arg2);
29443 if (GET_MODE (op0) != Pmode)
29444 op0 = convert_to_mode (Pmode, op0, 1);
29445 op0 = force_reg (Pmode, op0);
29448 op1 = copy_to_mode_reg (SImode, op1);
29450 op2 = copy_to_mode_reg (SImode, op2);
29451 emit_insn (ix86_gen_monitor (op0, op1, op2));
29454 case IX86_BUILTIN_MWAIT:
29455 arg0 = CALL_EXPR_ARG (exp, 0);
29456 arg1 = CALL_EXPR_ARG (exp, 1);
29457 op0 = expand_normal (arg0);
29458 op1 = expand_normal (arg1);
29460 op0 = copy_to_mode_reg (SImode, op0);
29462 op1 = copy_to_mode_reg (SImode, op1);
29463 emit_insn (gen_sse3_mwait (op0, op1));
29466 case IX86_BUILTIN_VEC_INIT_V2SI:
29467 case IX86_BUILTIN_VEC_INIT_V4HI:
29468 case IX86_BUILTIN_VEC_INIT_V8QI:
29469 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
29471 case IX86_BUILTIN_VEC_EXT_V2DF:
29472 case IX86_BUILTIN_VEC_EXT_V2DI:
29473 case IX86_BUILTIN_VEC_EXT_V4SF:
29474 case IX86_BUILTIN_VEC_EXT_V4SI:
29475 case IX86_BUILTIN_VEC_EXT_V8HI:
29476 case IX86_BUILTIN_VEC_EXT_V2SI:
29477 case IX86_BUILTIN_VEC_EXT_V4HI:
29478 case IX86_BUILTIN_VEC_EXT_V16QI:
29479 return ix86_expand_vec_ext_builtin (exp, target);
29481 case IX86_BUILTIN_VEC_SET_V2DI:
29482 case IX86_BUILTIN_VEC_SET_V4SF:
29483 case IX86_BUILTIN_VEC_SET_V4SI:
29484 case IX86_BUILTIN_VEC_SET_V8HI:
29485 case IX86_BUILTIN_VEC_SET_V4HI:
29486 case IX86_BUILTIN_VEC_SET_V16QI:
29487 return ix86_expand_vec_set_builtin (exp);
29489 case IX86_BUILTIN_INFQ:
29490 case IX86_BUILTIN_HUGE_VALQ:
29492 REAL_VALUE_TYPE inf;
29496 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
29498 tmp = validize_mem (force_const_mem (mode, tmp));
29501 target = gen_reg_rtx (mode);
29503 emit_move_insn (target, tmp);
29507 case IX86_BUILTIN_LLWPCB:
29508 arg0 = CALL_EXPR_ARG (exp, 0);
29509 op0 = expand_normal (arg0);
29510 icode = CODE_FOR_lwp_llwpcb;
29511 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
29513 if (GET_MODE (op0) != Pmode)
29514 op0 = convert_to_mode (Pmode, op0, 1);
29515 op0 = force_reg (Pmode, op0);
29517 emit_insn (gen_lwp_llwpcb (op0));
29520 case IX86_BUILTIN_SLWPCB:
29521 icode = CODE_FOR_lwp_slwpcb;
29523 || !insn_data[icode].operand[0].predicate (target, Pmode))
29524 target = gen_reg_rtx (Pmode);
29525 emit_insn (gen_lwp_slwpcb (target));
29528 case IX86_BUILTIN_BEXTRI32:
29529 case IX86_BUILTIN_BEXTRI64:
29530 arg0 = CALL_EXPR_ARG (exp, 0);
29531 arg1 = CALL_EXPR_ARG (exp, 1);
29532 op0 = expand_normal (arg0);
29533 op1 = expand_normal (arg1);
29534 icode = (fcode == IX86_BUILTIN_BEXTRI32
29535 ? CODE_FOR_tbm_bextri_si
29536 : CODE_FOR_tbm_bextri_di);
29537 if (!CONST_INT_P (op1))
29539 error ("last argument must be an immediate");
29544 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
29545 unsigned char lsb_index = INTVAL (op1) & 0xFF;
29546 op1 = GEN_INT (length);
29547 op2 = GEN_INT (lsb_index);
29548 pat = GEN_FCN (icode) (target, op0, op1, op2);
29554 case IX86_BUILTIN_RDRAND16_STEP:
29555 icode = CODE_FOR_rdrandhi_1;
29559 case IX86_BUILTIN_RDRAND32_STEP:
29560 icode = CODE_FOR_rdrandsi_1;
29564 case IX86_BUILTIN_RDRAND64_STEP:
29565 icode = CODE_FOR_rdranddi_1;
29569 op0 = gen_reg_rtx (mode0);
29570 emit_insn (GEN_FCN (icode) (op0));
29572 arg0 = CALL_EXPR_ARG (exp, 0);
29573 op1 = expand_normal (arg0);
29574 if (!address_operand (op1, VOIDmode))
29576 op1 = convert_memory_address (Pmode, op1);
29577 op1 = copy_addr_to_reg (op1);
29579 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
29581 op1 = gen_reg_rtx (SImode);
29582 emit_move_insn (op1, CONST1_RTX (SImode));
29584 /* Emit SImode conditional move. */
29585 if (mode0 == HImode)
29587 op2 = gen_reg_rtx (SImode);
29588 emit_insn (gen_zero_extendhisi2 (op2, op0));
29590 else if (mode0 == SImode)
29593 op2 = gen_rtx_SUBREG (SImode, op0, 0);
29596 target = gen_reg_rtx (SImode);
29598 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
29600 emit_insn (gen_rtx_SET (VOIDmode, target,
29601 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
29604 case IX86_BUILTIN_GATHERSIV2DF:
29605 icode = CODE_FOR_avx2_gathersiv2df;
29607 case IX86_BUILTIN_GATHERSIV4DF:
29608 icode = CODE_FOR_avx2_gathersiv4df;
29610 case IX86_BUILTIN_GATHERDIV2DF:
29611 icode = CODE_FOR_avx2_gatherdiv2df;
29613 case IX86_BUILTIN_GATHERDIV4DF:
29614 icode = CODE_FOR_avx2_gatherdiv4df;
29616 case IX86_BUILTIN_GATHERSIV4SF:
29617 icode = CODE_FOR_avx2_gathersiv4sf;
29619 case IX86_BUILTIN_GATHERSIV8SF:
29620 icode = CODE_FOR_avx2_gathersiv8sf;
29622 case IX86_BUILTIN_GATHERDIV4SF:
29623 icode = CODE_FOR_avx2_gatherdiv4sf;
29625 case IX86_BUILTIN_GATHERDIV8SF:
29626 icode = CODE_FOR_avx2_gatherdiv8sf;
29628 case IX86_BUILTIN_GATHERSIV2DI:
29629 icode = CODE_FOR_avx2_gathersiv2di;
29631 case IX86_BUILTIN_GATHERSIV4DI:
29632 icode = CODE_FOR_avx2_gathersiv4di;
29634 case IX86_BUILTIN_GATHERDIV2DI:
29635 icode = CODE_FOR_avx2_gatherdiv2di;
29637 case IX86_BUILTIN_GATHERDIV4DI:
29638 icode = CODE_FOR_avx2_gatherdiv4di;
29640 case IX86_BUILTIN_GATHERSIV4SI:
29641 icode = CODE_FOR_avx2_gathersiv4si;
29643 case IX86_BUILTIN_GATHERSIV8SI:
29644 icode = CODE_FOR_avx2_gathersiv8si;
29646 case IX86_BUILTIN_GATHERDIV4SI:
29647 icode = CODE_FOR_avx2_gatherdiv4si;
29649 case IX86_BUILTIN_GATHERDIV8SI:
29650 icode = CODE_FOR_avx2_gatherdiv8si;
29652 case IX86_BUILTIN_GATHERALTSIV4DF:
29653 icode = CODE_FOR_avx2_gathersiv4df;
29655 case IX86_BUILTIN_GATHERALTDIV8SF:
29656 icode = CODE_FOR_avx2_gatherdiv8sf;
29658 case IX86_BUILTIN_GATHERALTSIV4DI:
29659 icode = CODE_FOR_avx2_gathersiv4di;
29661 case IX86_BUILTIN_GATHERALTDIV8SI:
29662 icode = CODE_FOR_avx2_gatherdiv8si;
29666 arg0 = CALL_EXPR_ARG (exp, 0);
29667 arg1 = CALL_EXPR_ARG (exp, 1);
29668 arg2 = CALL_EXPR_ARG (exp, 2);
29669 arg3 = CALL_EXPR_ARG (exp, 3);
29670 arg4 = CALL_EXPR_ARG (exp, 4);
29671 op0 = expand_normal (arg0);
29672 op1 = expand_normal (arg1);
29673 op2 = expand_normal (arg2);
29674 op3 = expand_normal (arg3);
29675 op4 = expand_normal (arg4);
29676 /* Note the arg order is different from the operand order. */
29677 mode0 = insn_data[icode].operand[1].mode;
29678 mode2 = insn_data[icode].operand[3].mode;
29679 mode3 = insn_data[icode].operand[4].mode;
29680 mode4 = insn_data[icode].operand[5].mode;
29682 if (target == NULL_RTX
29683 || GET_MODE (target) != insn_data[icode].operand[0].mode)
29684 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
29686 subtarget = target;
29688 if (fcode == IX86_BUILTIN_GATHERALTSIV4DF
29689 || fcode == IX86_BUILTIN_GATHERALTSIV4DI)
29691 rtx half = gen_reg_rtx (V4SImode);
29692 if (!nonimmediate_operand (op2, V8SImode))
29693 op2 = copy_to_mode_reg (V8SImode, op2);
29694 emit_insn (gen_vec_extract_lo_v8si (half, op2));
29697 else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF
29698 || fcode == IX86_BUILTIN_GATHERALTDIV8SI)
29700 rtx (*gen) (rtx, rtx);
29701 rtx half = gen_reg_rtx (mode0);
29702 if (mode0 == V4SFmode)
29703 gen = gen_vec_extract_lo_v8sf;
29705 gen = gen_vec_extract_lo_v8si;
29706 if (!nonimmediate_operand (op0, GET_MODE (op0)))
29707 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
29708 emit_insn (gen (half, op0));
29710 if (!nonimmediate_operand (op3, GET_MODE (op3)))
29711 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
29712 emit_insn (gen (half, op3));
29716 /* Force memory operand only with base register here. But we
29717 don't want to do it on memory operand for other builtin
29719 if (GET_MODE (op1) != Pmode)
29720 op1 = convert_to_mode (Pmode, op1, 1);
29721 op1 = force_reg (Pmode, op1);
29723 if (!insn_data[icode].operand[1].predicate (op0, mode0))
29724 op0 = copy_to_mode_reg (mode0, op0);
29725 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
29726 op1 = copy_to_mode_reg (Pmode, op1);
29727 if (!insn_data[icode].operand[3].predicate (op2, mode2))
29728 op2 = copy_to_mode_reg (mode2, op2);
29729 if (!insn_data[icode].operand[4].predicate (op3, mode3))
29730 op3 = copy_to_mode_reg (mode3, op3);
29731 if (!insn_data[icode].operand[5].predicate (op4, mode4))
29733 error ("last argument must be scale 1, 2, 4, 8");
29737 /* Optimize. If mask is known to have all high bits set,
29738 replace op0 with pc_rtx to signal that the instruction
29739 overwrites the whole destination and doesn't use its
29740 previous contents. */
29743 if (TREE_CODE (arg3) == VECTOR_CST)
29746 unsigned int negative = 0;
29747 for (elt = TREE_VECTOR_CST_ELTS (arg3);
29748 elt; elt = TREE_CHAIN (elt))
29750 tree cst = TREE_VALUE (elt);
29751 if (TREE_CODE (cst) == INTEGER_CST
29752 && tree_int_cst_sign_bit (cst))
29754 else if (TREE_CODE (cst) == REAL_CST
29755 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
29758 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
29761 else if (TREE_CODE (arg3) == SSA_NAME)
29763 /* Recognize also when mask is like:
29764 __v2df src = _mm_setzero_pd ();
29765 __v2df mask = _mm_cmpeq_pd (src, src);
29767 __v8sf src = _mm256_setzero_ps ();
29768 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
29769 as that is a cheaper way to load all ones into
29770 a register than having to load a constant from
29772 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
29773 if (is_gimple_call (def_stmt))
29775 tree fndecl = gimple_call_fndecl (def_stmt);
29777 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
29778 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
29780 case IX86_BUILTIN_CMPPD:
29781 case IX86_BUILTIN_CMPPS:
29782 case IX86_BUILTIN_CMPPD256:
29783 case IX86_BUILTIN_CMPPS256:
29784 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
29787 case IX86_BUILTIN_CMPEQPD:
29788 case IX86_BUILTIN_CMPEQPS:
29789 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
29790 && initializer_zerop (gimple_call_arg (def_stmt,
29801 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
29806 if (fcode == IX86_BUILTIN_GATHERDIV8SF
29807 || fcode == IX86_BUILTIN_GATHERDIV8SI)
29809 enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode
29810 ? V4SFmode : V4SImode;
29811 if (target == NULL_RTX)
29812 target = gen_reg_rtx (tmode);
29813 if (tmode == V4SFmode)
29814 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
29816 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
29819 target = subtarget;
29827 for (i = 0, d = bdesc_special_args;
29828 i < ARRAY_SIZE (bdesc_special_args);
29830 if (d->code == fcode)
29831 return ix86_expand_special_args_builtin (d, exp, target);
29833 for (i = 0, d = bdesc_args;
29834 i < ARRAY_SIZE (bdesc_args);
29836 if (d->code == fcode)
29839 case IX86_BUILTIN_FABSQ:
29840 case IX86_BUILTIN_COPYSIGNQ:
29842 /* Emit a normal call if SSE2 isn't available. */
29843 return expand_call (exp, target, ignore);
29845 return ix86_expand_args_builtin (d, exp, target);
29848 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
29849 if (d->code == fcode)
29850 return ix86_expand_sse_comi (d, exp, target);
29852 for (i = 0, d = bdesc_pcmpestr;
29853 i < ARRAY_SIZE (bdesc_pcmpestr);
29855 if (d->code == fcode)
29856 return ix86_expand_sse_pcmpestr (d, exp, target);
29858 for (i = 0, d = bdesc_pcmpistr;
29859 i < ARRAY_SIZE (bdesc_pcmpistr);
29861 if (d->code == fcode)
29862 return ix86_expand_sse_pcmpistr (d, exp, target);
29864 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
29865 if (d->code == fcode)
29866 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
29867 (enum ix86_builtin_func_type)
29868 d->flag, d->comparison);
29870 gcc_unreachable ();
29873 /* Returns a function decl for a vectorized version of the builtin function
29874 with builtin function code FN and the result vector type TYPE, or NULL_TREE
29875 if it is not available. */
29878 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
29881 enum machine_mode in_mode, out_mode;
29883 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29885 if (TREE_CODE (type_out) != VECTOR_TYPE
29886 || TREE_CODE (type_in) != VECTOR_TYPE
29887 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
29890 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29891 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29892 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29893 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29897 case BUILT_IN_SQRT:
29898 if (out_mode == DFmode && in_mode == DFmode)
29900 if (out_n == 2 && in_n == 2)
29901 return ix86_builtins[IX86_BUILTIN_SQRTPD];
29902 else if (out_n == 4 && in_n == 4)
29903 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
29907 case BUILT_IN_SQRTF:
29908 if (out_mode == SFmode && in_mode == SFmode)
29910 if (out_n == 4 && in_n == 4)
29911 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
29912 else if (out_n == 8 && in_n == 8)
29913 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
29917 case BUILT_IN_IFLOOR:
29918 case BUILT_IN_LFLOOR:
29919 case BUILT_IN_LLFLOOR:
29920 /* The round insn does not trap on denormals. */
29921 if (flag_trapping_math || !TARGET_ROUND)
29924 if (out_mode == SImode && in_mode == DFmode)
29926 if (out_n == 4 && in_n == 2)
29927 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX];
29928 else if (out_n == 8 && in_n == 4)
29929 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256];
29933 case BUILT_IN_IFLOORF:
29934 case BUILT_IN_LFLOORF:
29935 case BUILT_IN_LLFLOORF:
29936 /* The round insn does not trap on denormals. */
29937 if (flag_trapping_math || !TARGET_ROUND)
29940 if (out_mode == SImode && in_mode == SFmode)
29942 if (out_n == 4 && in_n == 4)
29943 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX];
29944 else if (out_n == 8 && in_n == 8)
29945 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX256];
29949 case BUILT_IN_ICEIL:
29950 case BUILT_IN_LCEIL:
29951 case BUILT_IN_LLCEIL:
29952 /* The round insn does not trap on denormals. */
29953 if (flag_trapping_math || !TARGET_ROUND)
29956 if (out_mode == SImode && in_mode == DFmode)
29958 if (out_n == 4 && in_n == 2)
29959 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX];
29960 else if (out_n == 8 && in_n == 4)
29961 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256];
29965 case BUILT_IN_ICEILF:
29966 case BUILT_IN_LCEILF:
29967 case BUILT_IN_LLCEILF:
29968 /* The round insn does not trap on denormals. */
29969 if (flag_trapping_math || !TARGET_ROUND)
29972 if (out_mode == SImode && in_mode == SFmode)
29974 if (out_n == 4 && in_n == 4)
29975 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX];
29976 else if (out_n == 8 && in_n == 8)
29977 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX256];
29981 case BUILT_IN_IRINT:
29982 case BUILT_IN_LRINT:
29983 case BUILT_IN_LLRINT:
29984 if (out_mode == SImode && in_mode == DFmode)
29986 if (out_n == 4 && in_n == 2)
29987 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
29988 else if (out_n == 8 && in_n == 4)
29989 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256];
29993 case BUILT_IN_IRINTF:
29994 case BUILT_IN_LRINTF:
29995 case BUILT_IN_LLRINTF:
29996 if (out_mode == SImode && in_mode == SFmode)
29998 if (out_n == 4 && in_n == 4)
29999 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
30000 else if (out_n == 8 && in_n == 8)
30001 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
30005 case BUILT_IN_IROUND:
30006 case BUILT_IN_LROUND:
30007 case BUILT_IN_LLROUND:
30008 /* The round insn does not trap on denormals. */
30009 if (flag_trapping_math || !TARGET_ROUND)
30012 if (out_mode == SImode && in_mode == DFmode)
30014 if (out_n == 4 && in_n == 2)
30015 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX];
30016 else if (out_n == 8 && in_n == 4)
30017 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256];
30021 case BUILT_IN_IROUNDF:
30022 case BUILT_IN_LROUNDF:
30023 case BUILT_IN_LLROUNDF:
30024 /* The round insn does not trap on denormals. */
30025 if (flag_trapping_math || !TARGET_ROUND)
30028 if (out_mode == SImode && in_mode == SFmode)
30030 if (out_n == 4 && in_n == 4)
30031 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX];
30032 else if (out_n == 8 && in_n == 8)
30033 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX256];
30037 case BUILT_IN_COPYSIGN:
30038 if (out_mode == DFmode && in_mode == DFmode)
30040 if (out_n == 2 && in_n == 2)
30041 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
30042 else if (out_n == 4 && in_n == 4)
30043 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
30047 case BUILT_IN_COPYSIGNF:
30048 if (out_mode == SFmode && in_mode == SFmode)
30050 if (out_n == 4 && in_n == 4)
30051 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
30052 else if (out_n == 8 && in_n == 8)
30053 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
30057 case BUILT_IN_FLOOR:
30058 /* The round insn does not trap on denormals. */
30059 if (flag_trapping_math || !TARGET_ROUND)
30062 if (out_mode == DFmode && in_mode == DFmode)
30064 if (out_n == 2 && in_n == 2)
30065 return ix86_builtins[IX86_BUILTIN_FLOORPD];
30066 else if (out_n == 4 && in_n == 4)
30067 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
30071 case BUILT_IN_FLOORF:
30072 /* The round insn does not trap on denormals. */
30073 if (flag_trapping_math || !TARGET_ROUND)
30076 if (out_mode == SFmode && in_mode == SFmode)
30078 if (out_n == 4 && in_n == 4)
30079 return ix86_builtins[IX86_BUILTIN_FLOORPS];
30080 else if (out_n == 8 && in_n == 8)
30081 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
30085 case BUILT_IN_CEIL:
30086 /* The round insn does not trap on denormals. */
30087 if (flag_trapping_math || !TARGET_ROUND)
30090 if (out_mode == DFmode && in_mode == DFmode)
30092 if (out_n == 2 && in_n == 2)
30093 return ix86_builtins[IX86_BUILTIN_CEILPD];
30094 else if (out_n == 4 && in_n == 4)
30095 return ix86_builtins[IX86_BUILTIN_CEILPD256];
30099 case BUILT_IN_CEILF:
30100 /* The round insn does not trap on denormals. */
30101 if (flag_trapping_math || !TARGET_ROUND)
30104 if (out_mode == SFmode && in_mode == SFmode)
30106 if (out_n == 4 && in_n == 4)
30107 return ix86_builtins[IX86_BUILTIN_CEILPS];
30108 else if (out_n == 8 && in_n == 8)
30109 return ix86_builtins[IX86_BUILTIN_CEILPS256];
30113 case BUILT_IN_TRUNC:
30114 /* The round insn does not trap on denormals. */
30115 if (flag_trapping_math || !TARGET_ROUND)
30118 if (out_mode == DFmode && in_mode == DFmode)
30120 if (out_n == 2 && in_n == 2)
30121 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
30122 else if (out_n == 4 && in_n == 4)
30123 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
30127 case BUILT_IN_TRUNCF:
30128 /* The round insn does not trap on denormals. */
30129 if (flag_trapping_math || !TARGET_ROUND)
30132 if (out_mode == SFmode && in_mode == SFmode)
30134 if (out_n == 4 && in_n == 4)
30135 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
30136 else if (out_n == 8 && in_n == 8)
30137 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
30141 case BUILT_IN_RINT:
30142 /* The round insn does not trap on denormals. */
30143 if (flag_trapping_math || !TARGET_ROUND)
30146 if (out_mode == DFmode && in_mode == DFmode)
30148 if (out_n == 2 && in_n == 2)
30149 return ix86_builtins[IX86_BUILTIN_RINTPD];
30150 else if (out_n == 4 && in_n == 4)
30151 return ix86_builtins[IX86_BUILTIN_RINTPD256];
30155 case BUILT_IN_RINTF:
30156 /* The round insn does not trap on denormals. */
30157 if (flag_trapping_math || !TARGET_ROUND)
30160 if (out_mode == SFmode && in_mode == SFmode)
30162 if (out_n == 4 && in_n == 4)
30163 return ix86_builtins[IX86_BUILTIN_RINTPS];
30164 else if (out_n == 8 && in_n == 8)
30165 return ix86_builtins[IX86_BUILTIN_RINTPS256];
30169 case BUILT_IN_ROUND:
30170 /* The round insn does not trap on denormals. */
30171 if (flag_trapping_math || !TARGET_ROUND)
30174 if (out_mode == DFmode && in_mode == DFmode)
30176 if (out_n == 2 && in_n == 2)
30177 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
30178 else if (out_n == 4 && in_n == 4)
30179 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
30183 case BUILT_IN_ROUNDF:
30184 /* The round insn does not trap on denormals. */
30185 if (flag_trapping_math || !TARGET_ROUND)
30188 if (out_mode == SFmode && in_mode == SFmode)
30190 if (out_n == 4 && in_n == 4)
30191 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
30192 else if (out_n == 8 && in_n == 8)
30193 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
30198 if (out_mode == DFmode && in_mode == DFmode)
30200 if (out_n == 2 && in_n == 2)
30201 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
30202 if (out_n == 4 && in_n == 4)
30203 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
30207 case BUILT_IN_FMAF:
30208 if (out_mode == SFmode && in_mode == SFmode)
30210 if (out_n == 4 && in_n == 4)
30211 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
30212 if (out_n == 8 && in_n == 8)
30213 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
30221 /* Dispatch to a handler for a vectorization library. */
30222 if (ix86_veclib_handler)
30223 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
30229 /* Handler for an SVML-style interface to
30230 a library with vectorized intrinsics. */
30233 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
30236 tree fntype, new_fndecl, args;
30239 enum machine_mode el_mode, in_mode;
30242 /* The SVML is suitable for unsafe math only. */
30243 if (!flag_unsafe_math_optimizations)
30246 el_mode = TYPE_MODE (TREE_TYPE (type_out));
30247 n = TYPE_VECTOR_SUBPARTS (type_out);
30248 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30249 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30250 if (el_mode != in_mode
30258 case BUILT_IN_LOG10:
30260 case BUILT_IN_TANH:
30262 case BUILT_IN_ATAN:
30263 case BUILT_IN_ATAN2:
30264 case BUILT_IN_ATANH:
30265 case BUILT_IN_CBRT:
30266 case BUILT_IN_SINH:
30268 case BUILT_IN_ASINH:
30269 case BUILT_IN_ASIN:
30270 case BUILT_IN_COSH:
30272 case BUILT_IN_ACOSH:
30273 case BUILT_IN_ACOS:
30274 if (el_mode != DFmode || n != 2)
30278 case BUILT_IN_EXPF:
30279 case BUILT_IN_LOGF:
30280 case BUILT_IN_LOG10F:
30281 case BUILT_IN_POWF:
30282 case BUILT_IN_TANHF:
30283 case BUILT_IN_TANF:
30284 case BUILT_IN_ATANF:
30285 case BUILT_IN_ATAN2F:
30286 case BUILT_IN_ATANHF:
30287 case BUILT_IN_CBRTF:
30288 case BUILT_IN_SINHF:
30289 case BUILT_IN_SINF:
30290 case BUILT_IN_ASINHF:
30291 case BUILT_IN_ASINF:
30292 case BUILT_IN_COSHF:
30293 case BUILT_IN_COSF:
30294 case BUILT_IN_ACOSHF:
30295 case BUILT_IN_ACOSF:
30296 if (el_mode != SFmode || n != 4)
30304 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
30306 if (fn == BUILT_IN_LOGF)
30307 strcpy (name, "vmlsLn4");
30308 else if (fn == BUILT_IN_LOG)
30309 strcpy (name, "vmldLn2");
30312 sprintf (name, "vmls%s", bname+10);
30313 name[strlen (name)-1] = '4';
30316 sprintf (name, "vmld%s2", bname+10);
30318 /* Convert to uppercase. */
30322 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
30324 args = TREE_CHAIN (args))
30328 fntype = build_function_type_list (type_out, type_in, NULL);
30330 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
30332 /* Build a function declaration for the vectorized function. */
30333 new_fndecl = build_decl (BUILTINS_LOCATION,
30334 FUNCTION_DECL, get_identifier (name), fntype);
30335 TREE_PUBLIC (new_fndecl) = 1;
30336 DECL_EXTERNAL (new_fndecl) = 1;
30337 DECL_IS_NOVOPS (new_fndecl) = 1;
30338 TREE_READONLY (new_fndecl) = 1;
30343 /* Handler for an ACML-style interface to
30344 a library with vectorized intrinsics. */
30347 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
30349 char name[20] = "__vr.._";
30350 tree fntype, new_fndecl, args;
30353 enum machine_mode el_mode, in_mode;
30356 /* The ACML is 64bits only and suitable for unsafe math only as
30357 it does not correctly support parts of IEEE with the required
30358 precision such as denormals. */
30360 || !flag_unsafe_math_optimizations)
30363 el_mode = TYPE_MODE (TREE_TYPE (type_out));
30364 n = TYPE_VECTOR_SUBPARTS (type_out);
30365 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30366 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30367 if (el_mode != in_mode
30377 case BUILT_IN_LOG2:
30378 case BUILT_IN_LOG10:
30381 if (el_mode != DFmode
30386 case BUILT_IN_SINF:
30387 case BUILT_IN_COSF:
30388 case BUILT_IN_EXPF:
30389 case BUILT_IN_POWF:
30390 case BUILT_IN_LOGF:
30391 case BUILT_IN_LOG2F:
30392 case BUILT_IN_LOG10F:
30395 if (el_mode != SFmode
30404 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
30405 sprintf (name + 7, "%s", bname+10);
30408 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
30410 args = TREE_CHAIN (args))
30414 fntype = build_function_type_list (type_out, type_in, NULL);
30416 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
30418 /* Build a function declaration for the vectorized function. */
30419 new_fndecl = build_decl (BUILTINS_LOCATION,
30420 FUNCTION_DECL, get_identifier (name), fntype);
30421 TREE_PUBLIC (new_fndecl) = 1;
30422 DECL_EXTERNAL (new_fndecl) = 1;
30423 DECL_IS_NOVOPS (new_fndecl) = 1;
30424 TREE_READONLY (new_fndecl) = 1;
30429 /* Returns a decl of a function that implements gather load with
30430 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
30431 Return NULL_TREE if it is not available. */
30434 ix86_vectorize_builtin_gather (const_tree mem_vectype,
30435 const_tree index_type, int scale)
30438 enum ix86_builtins code;
30443 if ((TREE_CODE (index_type) != INTEGER_TYPE
30444 && !POINTER_TYPE_P (index_type))
30445 || (TYPE_MODE (index_type) != SImode
30446 && TYPE_MODE (index_type) != DImode))
30449 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
30452 /* v*gather* insn sign extends index to pointer mode. */
30453 if (TYPE_PRECISION (index_type) < POINTER_SIZE
30454 && TYPE_UNSIGNED (index_type))
30459 || (scale & (scale - 1)) != 0)
30462 si = TYPE_MODE (index_type) == SImode;
30463 switch (TYPE_MODE (mem_vectype))
30466 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
30469 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
30472 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
30475 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
30478 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
30481 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
30484 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
30487 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
30493 return ix86_builtins[code];
30496 /* Returns a code for a target-specific builtin that implements
30497 reciprocal of the function, or NULL_TREE if not available. */
30500 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
30501 bool sqrt ATTRIBUTE_UNUSED)
30503 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
30504 && flag_finite_math_only && !flag_trapping_math
30505 && flag_unsafe_math_optimizations))
30509 /* Machine dependent builtins. */
30512 /* Vectorized version of sqrt to rsqrt conversion. */
30513 case IX86_BUILTIN_SQRTPS_NR:
30514 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
30516 case IX86_BUILTIN_SQRTPS_NR256:
30517 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
30523 /* Normal builtins. */
30526 /* Sqrt to rsqrt conversion. */
30527 case BUILT_IN_SQRTF:
30528 return ix86_builtins[IX86_BUILTIN_RSQRTF];
30535 /* Helper for avx_vpermilps256_operand et al. This is also used by
30536 the expansion functions to turn the parallel back into a mask.
30537 The return value is 0 for no match and the imm8+1 for a match. */
30540 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
30542 unsigned i, nelt = GET_MODE_NUNITS (mode);
30544 unsigned char ipar[8];
30546 if (XVECLEN (par, 0) != (int) nelt)
30549 /* Validate that all of the elements are constants, and not totally
30550 out of range. Copy the data into an integral array to make the
30551 subsequent checks easier. */
30552 for (i = 0; i < nelt; ++i)
30554 rtx er = XVECEXP (par, 0, i);
30555 unsigned HOST_WIDE_INT ei;
30557 if (!CONST_INT_P (er))
30568 /* In the 256-bit DFmode case, we can only move elements within
30570 for (i = 0; i < 2; ++i)
30574 mask |= ipar[i] << i;
30576 for (i = 2; i < 4; ++i)
30580 mask |= (ipar[i] - 2) << i;
30585 /* In the 256-bit SFmode case, we have full freedom of movement
30586 within the low 128-bit lane, but the high 128-bit lane must
30587 mirror the exact same pattern. */
30588 for (i = 0; i < 4; ++i)
30589 if (ipar[i] + 4 != ipar[i + 4])
30596 /* In the 128-bit case, we've full freedom in the placement of
30597 the elements from the source operand. */
30598 for (i = 0; i < nelt; ++i)
30599 mask |= ipar[i] << (i * (nelt / 2));
30603 gcc_unreachable ();
30606 /* Make sure success has a non-zero value by adding one. */
30610 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
30611 the expansion functions to turn the parallel back into a mask.
30612 The return value is 0 for no match and the imm8+1 for a match. */
30615 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
30617 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
30619 unsigned char ipar[8];
30621 if (XVECLEN (par, 0) != (int) nelt)
30624 /* Validate that all of the elements are constants, and not totally
30625 out of range. Copy the data into an integral array to make the
30626 subsequent checks easier. */
30627 for (i = 0; i < nelt; ++i)
30629 rtx er = XVECEXP (par, 0, i);
30630 unsigned HOST_WIDE_INT ei;
30632 if (!CONST_INT_P (er))
30635 if (ei >= 2 * nelt)
30640 /* Validate that the halves of the permute are halves. */
30641 for (i = 0; i < nelt2 - 1; ++i)
30642 if (ipar[i] + 1 != ipar[i + 1])
30644 for (i = nelt2; i < nelt - 1; ++i)
30645 if (ipar[i] + 1 != ipar[i + 1])
30648 /* Reconstruct the mask. */
30649 for (i = 0; i < 2; ++i)
30651 unsigned e = ipar[i * nelt2];
30655 mask |= e << (i * 4);
30658 /* Make sure success has a non-zero value by adding one. */
30662 /* Store OPERAND to the memory after reload is completed. This means
30663 that we can't easily use assign_stack_local. */
30665 ix86_force_to_memory (enum machine_mode mode, rtx operand)
30669 gcc_assert (reload_completed);
30670 if (ix86_using_red_zone ())
30672 result = gen_rtx_MEM (mode,
30673 gen_rtx_PLUS (Pmode,
30675 GEN_INT (-RED_ZONE_SIZE)));
30676 emit_move_insn (result, operand);
30678 else if (TARGET_64BIT)
30684 operand = gen_lowpart (DImode, operand);
30688 gen_rtx_SET (VOIDmode,
30689 gen_rtx_MEM (DImode,
30690 gen_rtx_PRE_DEC (DImode,
30691 stack_pointer_rtx)),
30695 gcc_unreachable ();
30697 result = gen_rtx_MEM (mode, stack_pointer_rtx);
30706 split_double_mode (mode, &operand, 1, operands, operands + 1);
30708 gen_rtx_SET (VOIDmode,
30709 gen_rtx_MEM (SImode,
30710 gen_rtx_PRE_DEC (Pmode,
30711 stack_pointer_rtx)),
30714 gen_rtx_SET (VOIDmode,
30715 gen_rtx_MEM (SImode,
30716 gen_rtx_PRE_DEC (Pmode,
30717 stack_pointer_rtx)),
30722 /* Store HImodes as SImodes. */
30723 operand = gen_lowpart (SImode, operand);
30727 gen_rtx_SET (VOIDmode,
30728 gen_rtx_MEM (GET_MODE (operand),
30729 gen_rtx_PRE_DEC (SImode,
30730 stack_pointer_rtx)),
30734 gcc_unreachable ();
30736 result = gen_rtx_MEM (mode, stack_pointer_rtx);
30741 /* Free operand from the memory. */
30743 ix86_free_from_memory (enum machine_mode mode)
30745 if (!ix86_using_red_zone ())
30749 if (mode == DImode || TARGET_64BIT)
30753 /* Use LEA to deallocate stack space. In peephole2 it will be converted
30754 to pop or add instruction if registers are available. */
30755 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
30756 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
30761 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
30763 Put float CONST_DOUBLE in the constant pool instead of fp regs.
30764 QImode must go into class Q_REGS.
30765 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
30766 movdf to do mem-to-mem moves through integer regs. */
30769 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
30771 enum machine_mode mode = GET_MODE (x);
30773 /* We're only allowed to return a subclass of CLASS. Many of the
30774 following checks fail for NO_REGS, so eliminate that early. */
30775 if (regclass == NO_REGS)
30778 /* All classes can load zeros. */
30779 if (x == CONST0_RTX (mode))
30782 /* Force constants into memory if we are loading a (nonzero) constant into
30783 an MMX or SSE register. This is because there are no MMX/SSE instructions
30784 to load from a constant. */
30786 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
30789 /* Prefer SSE regs only, if we can use them for math. */
30790 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
30791 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
30793 /* Floating-point constants need more complex checks. */
30794 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
30796 /* General regs can load everything. */
30797 if (reg_class_subset_p (regclass, GENERAL_REGS))
30800 /* Floats can load 0 and 1 plus some others. Note that we eliminated
30801 zero above. We only want to wind up preferring 80387 registers if
30802 we plan on doing computation with them. */
30804 && standard_80387_constant_p (x) > 0)
30806 /* Limit class to non-sse. */
30807 if (regclass == FLOAT_SSE_REGS)
30809 if (regclass == FP_TOP_SSE_REGS)
30811 if (regclass == FP_SECOND_SSE_REGS)
30812 return FP_SECOND_REG;
30813 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
30820 /* Generally when we see PLUS here, it's the function invariant
30821 (plus soft-fp const_int). Which can only be computed into general
30823 if (GET_CODE (x) == PLUS)
30824 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
30826 /* QImode constants are easy to load, but non-constant QImode data
30827 must go into Q_REGS. */
30828 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
30830 if (reg_class_subset_p (regclass, Q_REGS))
30832 if (reg_class_subset_p (Q_REGS, regclass))
30840 /* Discourage putting floating-point values in SSE registers unless
30841 SSE math is being used, and likewise for the 387 registers. */
30843 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
30845 enum machine_mode mode = GET_MODE (x);
30847 /* Restrict the output reload class to the register bank that we are doing
30848 math on. If we would like not to return a subset of CLASS, reject this
30849 alternative: if reload cannot do this, it will still use its choice. */
30850 mode = GET_MODE (x);
30851 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
30852 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
30854 if (X87_FLOAT_MODE_P (mode))
30856 if (regclass == FP_TOP_SSE_REGS)
30858 else if (regclass == FP_SECOND_SSE_REGS)
30859 return FP_SECOND_REG;
30861 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
30868 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
30869 enum machine_mode mode, secondary_reload_info *sri)
30871 /* Double-word spills from general registers to non-offsettable memory
30872 references (zero-extended addresses) require special handling. */
30875 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
30876 && rclass == GENERAL_REGS
30877 && !offsettable_memref_p (x))
30880 ? CODE_FOR_reload_noff_load
30881 : CODE_FOR_reload_noff_store);
30882 /* Add the cost of moving address to a temporary. */
30883 sri->extra_cost = 1;
30888 /* QImode spills from non-QI registers require
30889 intermediate register on 32bit targets. */
30891 && !in_p && mode == QImode
30892 && (rclass == GENERAL_REGS
30893 || rclass == LEGACY_REGS
30894 || rclass == INDEX_REGS))
30903 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
30904 regno = true_regnum (x);
30906 /* Return Q_REGS if the operand is in memory. */
30911 /* This condition handles corner case where an expression involving
30912 pointers gets vectorized. We're trying to use the address of a
30913 stack slot as a vector initializer.
30915 (set (reg:V2DI 74 [ vect_cst_.2 ])
30916 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
30918 Eventually frame gets turned into sp+offset like this:
30920 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30921 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30922 (const_int 392 [0x188]))))
30924 That later gets turned into:
30926 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30927 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30928 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
30930 We'll have the following reload recorded:
30932 Reload 0: reload_in (DI) =
30933 (plus:DI (reg/f:DI 7 sp)
30934 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
30935 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30936 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
30937 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
30938 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30939 reload_reg_rtx: (reg:V2DI 22 xmm1)
30941 Which isn't going to work since SSE instructions can't handle scalar
30942 additions. Returning GENERAL_REGS forces the addition into integer
30943 register and reload can handle subsequent reloads without problems. */
30945 if (in_p && GET_CODE (x) == PLUS
30946 && SSE_CLASS_P (rclass)
30947 && SCALAR_INT_MODE_P (mode))
30948 return GENERAL_REGS;
30953 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
30956 ix86_class_likely_spilled_p (reg_class_t rclass)
30967 case SSE_FIRST_REG:
30969 case FP_SECOND_REG:
30979 /* If we are copying between general and FP registers, we need a memory
30980 location. The same is true for SSE and MMX registers.
30982 To optimize register_move_cost performance, allow inline variant.
30984 The macro can't work reliably when one of the CLASSES is class containing
30985 registers from multiple units (SSE, MMX, integer). We avoid this by never
30986 combining those units in single alternative in the machine description.
30987 Ensure that this constraint holds to avoid unexpected surprises.
30989 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
30990 enforce these sanity checks. */
30993 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
30994 enum machine_mode mode, int strict)
30996 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
30997 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
30998 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
30999 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
31000 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
31001 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
31003 gcc_assert (!strict);
31007 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
31010 /* ??? This is a lie. We do have moves between mmx/general, and for
31011 mmx/sse2. But by saying we need secondary memory we discourage the
31012 register allocator from using the mmx registers unless needed. */
31013 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
31016 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
31018 /* SSE1 doesn't have any direct moves from other classes. */
31022 /* If the target says that inter-unit moves are more expensive
31023 than moving through memory, then don't generate them. */
31024 if (!TARGET_INTER_UNIT_MOVES)
31027 /* Between SSE and general, we have moves no larger than word size. */
31028 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
31036 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
31037 enum machine_mode mode, int strict)
31039 return inline_secondary_memory_needed (class1, class2, mode, strict);
31042 /* Implement the TARGET_CLASS_MAX_NREGS hook.
31044 On the 80386, this is the size of MODE in words,
31045 except in the FP regs, where a single reg is always enough. */
31047 static unsigned char
31048 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
31050 if (MAYBE_INTEGER_CLASS_P (rclass))
31052 if (mode == XFmode)
31053 return (TARGET_64BIT ? 2 : 3);
31054 else if (mode == XCmode)
31055 return (TARGET_64BIT ? 4 : 6);
31057 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
31061 if (COMPLEX_MODE_P (mode))
31068 /* Return true if the registers in CLASS cannot represent the change from
31069 modes FROM to TO. */
31072 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
31073 enum reg_class regclass)
31078 /* x87 registers can't do subreg at all, as all values are reformatted
31079 to extended precision. */
31080 if (MAYBE_FLOAT_CLASS_P (regclass))
31083 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
31085 /* Vector registers do not support QI or HImode loads. If we don't
31086 disallow a change to these modes, reload will assume it's ok to
31087 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
31088 the vec_dupv4hi pattern. */
31089 if (GET_MODE_SIZE (from) < 4)
31092 /* Vector registers do not support subreg with nonzero offsets, which
31093 are otherwise valid for integer registers. Since we can't see
31094 whether we have a nonzero offset from here, prohibit all
31095 nonparadoxical subregs changing size. */
31096 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
31103 /* Return the cost of moving data of mode M between a
31104 register and memory. A value of 2 is the default; this cost is
31105 relative to those in `REGISTER_MOVE_COST'.
31107 This function is used extensively by register_move_cost that is used to
31108 build tables at startup. Make it inline in this case.
31109 When IN is 2, return maximum of in and out move cost.
31111 If moving between registers and memory is more expensive than
31112 between two registers, you should define this macro to express the
31115 Model also increased moving costs of QImode registers in non
31119 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
31123 if (FLOAT_CLASS_P (regclass))
31141 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
31142 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
31144 if (SSE_CLASS_P (regclass))
31147 switch (GET_MODE_SIZE (mode))
31162 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
31163 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
31165 if (MMX_CLASS_P (regclass))
31168 switch (GET_MODE_SIZE (mode))
31180 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
31181 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
31183 switch (GET_MODE_SIZE (mode))
31186 if (Q_CLASS_P (regclass) || TARGET_64BIT)
31189 return ix86_cost->int_store[0];
31190 if (TARGET_PARTIAL_REG_DEPENDENCY
31191 && optimize_function_for_speed_p (cfun))
31192 cost = ix86_cost->movzbl_load;
31194 cost = ix86_cost->int_load[0];
31196 return MAX (cost, ix86_cost->int_store[0]);
31202 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
31204 return ix86_cost->movzbl_load;
31206 return ix86_cost->int_store[0] + 4;
31211 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
31212 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
31214 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
31215 if (mode == TFmode)
31218 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
31220 cost = ix86_cost->int_load[2];
31222 cost = ix86_cost->int_store[2];
31223 return (cost * (((int) GET_MODE_SIZE (mode)
31224 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
31229 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
31232 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
31236 /* Return the cost of moving data from a register in class CLASS1 to
31237 one in class CLASS2.
31239 It is not required that the cost always equal 2 when FROM is the same as TO;
31240 on some machines it is expensive to move between registers if they are not
31241 general registers. */
31244 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
31245 reg_class_t class2_i)
31247 enum reg_class class1 = (enum reg_class) class1_i;
31248 enum reg_class class2 = (enum reg_class) class2_i;
31250 /* In case we require secondary memory, compute cost of the store followed
31251 by load. In order to avoid bad register allocation choices, we need
31252 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31254 if (inline_secondary_memory_needed (class1, class2, mode, 0))
31258 cost += inline_memory_move_cost (mode, class1, 2);
31259 cost += inline_memory_move_cost (mode, class2, 2);
31261 /* In case of copying from general_purpose_register we may emit multiple
31262 stores followed by single load causing memory size mismatch stall.
31263 Count this as arbitrarily high cost of 20. */
31264 if (targetm.class_max_nregs (class1, mode)
31265 > targetm.class_max_nregs (class2, mode))
31268 /* In the case of FP/MMX moves, the registers actually overlap, and we
31269 have to switch modes in order to treat them differently. */
31270 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
31271 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
31277 /* Moves between SSE/MMX and integer unit are expensive. */
31278 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
31279 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
31281 /* ??? By keeping returned value relatively high, we limit the number
31282 of moves between integer and MMX/SSE registers for all targets.
31283 Additionally, high value prevents problem with x86_modes_tieable_p(),
31284 where integer modes in MMX/SSE registers are not tieable
31285 because of missing QImode and HImode moves to, from or between
31286 MMX/SSE registers. */
31287 return MAX (8, ix86_cost->mmxsse_to_integer);
31289 if (MAYBE_FLOAT_CLASS_P (class1))
31290 return ix86_cost->fp_move;
31291 if (MAYBE_SSE_CLASS_P (class1))
31292 return ix86_cost->sse_move;
31293 if (MAYBE_MMX_CLASS_P (class1))
31294 return ix86_cost->mmx_move;
31298 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31302 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
31304 /* Flags and only flags can only hold CCmode values. */
31305 if (CC_REGNO_P (regno))
31306 return GET_MODE_CLASS (mode) == MODE_CC;
31307 if (GET_MODE_CLASS (mode) == MODE_CC
31308 || GET_MODE_CLASS (mode) == MODE_RANDOM
31309 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
31311 if (FP_REGNO_P (regno))
31312 return VALID_FP_MODE_P (mode);
31313 if (SSE_REGNO_P (regno))
31315 /* We implement the move patterns for all vector modes into and
31316 out of SSE registers, even when no operation instructions
31317 are available. OImode move is available only when AVX is
31319 return ((TARGET_AVX && mode == OImode)
31320 || VALID_AVX256_REG_MODE (mode)
31321 || VALID_SSE_REG_MODE (mode)
31322 || VALID_SSE2_REG_MODE (mode)
31323 || VALID_MMX_REG_MODE (mode)
31324 || VALID_MMX_REG_MODE_3DNOW (mode));
31326 if (MMX_REGNO_P (regno))
31328 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31329 so if the register is available at all, then we can move data of
31330 the given mode into or out of it. */
31331 return (VALID_MMX_REG_MODE (mode)
31332 || VALID_MMX_REG_MODE_3DNOW (mode));
31335 if (mode == QImode)
31337 /* Take care for QImode values - they can be in non-QI regs,
31338 but then they do cause partial register stalls. */
31339 if (regno <= BX_REG || TARGET_64BIT)
31341 if (!TARGET_PARTIAL_REG_STALL)
31343 return !can_create_pseudo_p ();
31345 /* We handle both integer and floats in the general purpose registers. */
31346 else if (VALID_INT_MODE_P (mode))
31348 else if (VALID_FP_MODE_P (mode))
31350 else if (VALID_DFP_MODE_P (mode))
31352 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
31353 on to use that value in smaller contexts, this can easily force a
31354 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
31355 supporting DImode, allow it. */
31356 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
31362 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
31363 tieable integer mode. */
31366 ix86_tieable_integer_mode_p (enum machine_mode mode)
31375 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
31378 return TARGET_64BIT;
31385 /* Return true if MODE1 is accessible in a register that can hold MODE2
31386 without copying. That is, all register classes that can hold MODE2
31387 can also hold MODE1. */
31390 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
31392 if (mode1 == mode2)
31395 if (ix86_tieable_integer_mode_p (mode1)
31396 && ix86_tieable_integer_mode_p (mode2))
31399 /* MODE2 being XFmode implies fp stack or general regs, which means we
31400 can tie any smaller floating point modes to it. Note that we do not
31401 tie this with TFmode. */
31402 if (mode2 == XFmode)
31403 return mode1 == SFmode || mode1 == DFmode;
31405 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
31406 that we can tie it with SFmode. */
31407 if (mode2 == DFmode)
31408 return mode1 == SFmode;
31410 /* If MODE2 is only appropriate for an SSE register, then tie with
31411 any other mode acceptable to SSE registers. */
31412 if (GET_MODE_SIZE (mode2) == 16
31413 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
31414 return (GET_MODE_SIZE (mode1) == 16
31415 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
31417 /* If MODE2 is appropriate for an MMX register, then tie
31418 with any other mode acceptable to MMX registers. */
31419 if (GET_MODE_SIZE (mode2) == 8
31420 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
31421 return (GET_MODE_SIZE (mode1) == 8
31422 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
31427 /* Compute a (partial) cost for rtx X. Return true if the complete
31428 cost has been computed, and false if subexpressions should be
31429 scanned. In either case, *TOTAL contains the cost result. */
31432 ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
31435 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
31436 enum machine_mode mode = GET_MODE (x);
31437 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
31445 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
31447 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
31449 else if (flag_pic && SYMBOLIC_CONST (x)
31451 || (!GET_CODE (x) != LABEL_REF
31452 && (GET_CODE (x) != SYMBOL_REF
31453 || !SYMBOL_REF_LOCAL_P (x)))))
31460 if (mode == VOIDmode)
31463 switch (standard_80387_constant_p (x))
31468 default: /* Other constants */
31473 /* Start with (MEM (SYMBOL_REF)), since that's where
31474 it'll probably end up. Add a penalty for size. */
31475 *total = (COSTS_N_INSNS (1)
31476 + (flag_pic != 0 && !TARGET_64BIT)
31477 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
31483 /* The zero extensions is often completely free on x86_64, so make
31484 it as cheap as possible. */
31485 if (TARGET_64BIT && mode == DImode
31486 && GET_MODE (XEXP (x, 0)) == SImode)
31488 else if (TARGET_ZERO_EXTEND_WITH_AND)
31489 *total = cost->add;
31491 *total = cost->movzx;
31495 *total = cost->movsx;
31499 if (CONST_INT_P (XEXP (x, 1))
31500 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
31502 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
31505 *total = cost->add;
31508 if ((value == 2 || value == 3)
31509 && cost->lea <= cost->shift_const)
31511 *total = cost->lea;
31521 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
31523 if (CONST_INT_P (XEXP (x, 1)))
31525 if (INTVAL (XEXP (x, 1)) > 32)
31526 *total = cost->shift_const + COSTS_N_INSNS (2);
31528 *total = cost->shift_const * 2;
31532 if (GET_CODE (XEXP (x, 1)) == AND)
31533 *total = cost->shift_var * 2;
31535 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
31540 if (CONST_INT_P (XEXP (x, 1)))
31541 *total = cost->shift_const;
31543 *total = cost->shift_var;
31551 gcc_assert (FLOAT_MODE_P (mode));
31552 gcc_assert (TARGET_FMA || TARGET_FMA4);
31554 /* ??? SSE scalar/vector cost should be used here. */
31555 /* ??? Bald assumption that fma has the same cost as fmul. */
31556 *total = cost->fmul;
31557 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
31559 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
31561 if (GET_CODE (sub) == NEG)
31562 sub = XEXP (sub, 0);
31563 *total += rtx_cost (sub, FMA, 0, speed);
31566 if (GET_CODE (sub) == NEG)
31567 sub = XEXP (sub, 0);
31568 *total += rtx_cost (sub, FMA, 2, speed);
31573 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31575 /* ??? SSE scalar cost should be used here. */
31576 *total = cost->fmul;
31579 else if (X87_FLOAT_MODE_P (mode))
31581 *total = cost->fmul;
31584 else if (FLOAT_MODE_P (mode))
31586 /* ??? SSE vector cost should be used here. */
31587 *total = cost->fmul;
31592 rtx op0 = XEXP (x, 0);
31593 rtx op1 = XEXP (x, 1);
31595 if (CONST_INT_P (XEXP (x, 1)))
31597 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
31598 for (nbits = 0; value != 0; value &= value - 1)
31602 /* This is arbitrary. */
31605 /* Compute costs correctly for widening multiplication. */
31606 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
31607 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
31608 == GET_MODE_SIZE (mode))
31610 int is_mulwiden = 0;
31611 enum machine_mode inner_mode = GET_MODE (op0);
31613 if (GET_CODE (op0) == GET_CODE (op1))
31614 is_mulwiden = 1, op1 = XEXP (op1, 0);
31615 else if (CONST_INT_P (op1))
31617 if (GET_CODE (op0) == SIGN_EXTEND)
31618 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
31621 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
31625 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
31628 *total = (cost->mult_init[MODE_INDEX (mode)]
31629 + nbits * cost->mult_bit
31630 + rtx_cost (op0, outer_code, opno, speed)
31631 + rtx_cost (op1, outer_code, opno, speed));
31640 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31641 /* ??? SSE cost should be used here. */
31642 *total = cost->fdiv;
31643 else if (X87_FLOAT_MODE_P (mode))
31644 *total = cost->fdiv;
31645 else if (FLOAT_MODE_P (mode))
31646 /* ??? SSE vector cost should be used here. */
31647 *total = cost->fdiv;
31649 *total = cost->divide[MODE_INDEX (mode)];
31653 if (GET_MODE_CLASS (mode) == MODE_INT
31654 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
31656 if (GET_CODE (XEXP (x, 0)) == PLUS
31657 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
31658 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
31659 && CONSTANT_P (XEXP (x, 1)))
31661 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
31662 if (val == 2 || val == 4 || val == 8)
31664 *total = cost->lea;
31665 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
31666 outer_code, opno, speed);
31667 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
31668 outer_code, opno, speed);
31669 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31673 else if (GET_CODE (XEXP (x, 0)) == MULT
31674 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
31676 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
31677 if (val == 2 || val == 4 || val == 8)
31679 *total = cost->lea;
31680 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
31681 outer_code, opno, speed);
31682 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31686 else if (GET_CODE (XEXP (x, 0)) == PLUS)
31688 *total = cost->lea;
31689 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
31690 outer_code, opno, speed);
31691 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
31692 outer_code, opno, speed);
31693 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31700 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31702 /* ??? SSE cost should be used here. */
31703 *total = cost->fadd;
31706 else if (X87_FLOAT_MODE_P (mode))
31708 *total = cost->fadd;
31711 else if (FLOAT_MODE_P (mode))
31713 /* ??? SSE vector cost should be used here. */
31714 *total = cost->fadd;
31722 if (!TARGET_64BIT && mode == DImode)
31724 *total = (cost->add * 2
31725 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
31726 << (GET_MODE (XEXP (x, 0)) != DImode))
31727 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
31728 << (GET_MODE (XEXP (x, 1)) != DImode)));
31734 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31736 /* ??? SSE cost should be used here. */
31737 *total = cost->fchs;
31740 else if (X87_FLOAT_MODE_P (mode))
31742 *total = cost->fchs;
31745 else if (FLOAT_MODE_P (mode))
31747 /* ??? SSE vector cost should be used here. */
31748 *total = cost->fchs;
31754 if (!TARGET_64BIT && mode == DImode)
31755 *total = cost->add * 2;
31757 *total = cost->add;
31761 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
31762 && XEXP (XEXP (x, 0), 1) == const1_rtx
31763 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
31764 && XEXP (x, 1) == const0_rtx)
31766 /* This kind of construct is implemented using test[bwl].
31767 Treat it as if we had an AND. */
31768 *total = (cost->add
31769 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
31770 + rtx_cost (const1_rtx, outer_code, opno, speed));
31776 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
31781 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31782 /* ??? SSE cost should be used here. */
31783 *total = cost->fabs;
31784 else if (X87_FLOAT_MODE_P (mode))
31785 *total = cost->fabs;
31786 else if (FLOAT_MODE_P (mode))
31787 /* ??? SSE vector cost should be used here. */
31788 *total = cost->fabs;
31792 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31793 /* ??? SSE cost should be used here. */
31794 *total = cost->fsqrt;
31795 else if (X87_FLOAT_MODE_P (mode))
31796 *total = cost->fsqrt;
31797 else if (FLOAT_MODE_P (mode))
31798 /* ??? SSE vector cost should be used here. */
31799 *total = cost->fsqrt;
31803 if (XINT (x, 1) == UNSPEC_TP)
31810 case VEC_DUPLICATE:
31811 /* ??? Assume all of these vector manipulation patterns are
31812 recognizable. In which case they all pretty much have the
31814 *total = COSTS_N_INSNS (1);
31824 static int current_machopic_label_num;
31826 /* Given a symbol name and its associated stub, write out the
31827 definition of the stub. */
31830 machopic_output_stub (FILE *file, const char *symb, const char *stub)
31832 unsigned int length;
31833 char *binder_name, *symbol_name, lazy_ptr_name[32];
31834 int label = ++current_machopic_label_num;
31836 /* For 64-bit we shouldn't get here. */
31837 gcc_assert (!TARGET_64BIT);
31839 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
31840 symb = targetm.strip_name_encoding (symb);
31842 length = strlen (stub);
31843 binder_name = XALLOCAVEC (char, length + 32);
31844 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
31846 length = strlen (symb);
31847 symbol_name = XALLOCAVEC (char, length + 32);
31848 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
31850 sprintf (lazy_ptr_name, "L%d$lz", label);
31852 if (MACHOPIC_ATT_STUB)
31853 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
31854 else if (MACHOPIC_PURE)
31855 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
31857 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
31859 fprintf (file, "%s:\n", stub);
31860 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31862 if (MACHOPIC_ATT_STUB)
31864 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
31866 else if (MACHOPIC_PURE)
31869 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31870 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
31871 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
31872 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
31873 label, lazy_ptr_name, label);
31874 fprintf (file, "\tjmp\t*%%ecx\n");
31877 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
31879 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
31880 it needs no stub-binding-helper. */
31881 if (MACHOPIC_ATT_STUB)
31884 fprintf (file, "%s:\n", binder_name);
31888 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
31889 fprintf (file, "\tpushl\t%%ecx\n");
31892 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
31894 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
31896 /* N.B. Keep the correspondence of these
31897 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
31898 old-pic/new-pic/non-pic stubs; altering this will break
31899 compatibility with existing dylibs. */
31902 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31903 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
31906 /* 16-byte -mdynamic-no-pic stub. */
31907 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
31909 fprintf (file, "%s:\n", lazy_ptr_name);
31910 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31911 fprintf (file, ASM_LONG "%s\n", binder_name);
31913 #endif /* TARGET_MACHO */
31915 /* Order the registers for register allocator. */
31918 x86_order_regs_for_local_alloc (void)
31923 /* First allocate the local general purpose registers. */
31924 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
31925 if (GENERAL_REGNO_P (i) && call_used_regs[i])
31926 reg_alloc_order [pos++] = i;
31928 /* Global general purpose registers. */
31929 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
31930 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
31931 reg_alloc_order [pos++] = i;
31933 /* x87 registers come first in case we are doing FP math
31935 if (!TARGET_SSE_MATH)
31936 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
31937 reg_alloc_order [pos++] = i;
31939 /* SSE registers. */
31940 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
31941 reg_alloc_order [pos++] = i;
31942 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
31943 reg_alloc_order [pos++] = i;
31945 /* x87 registers. */
31946 if (TARGET_SSE_MATH)
31947 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
31948 reg_alloc_order [pos++] = i;
31950 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
31951 reg_alloc_order [pos++] = i;
31953 /* Initialize the rest of array as we do not allocate some registers
31955 while (pos < FIRST_PSEUDO_REGISTER)
31956 reg_alloc_order [pos++] = 0;
31959 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
31960 in struct attribute_spec handler. */
31962 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
31964 int flags ATTRIBUTE_UNUSED,
31965 bool *no_add_attrs)
31967 if (TREE_CODE (*node) != FUNCTION_TYPE
31968 && TREE_CODE (*node) != METHOD_TYPE
31969 && TREE_CODE (*node) != FIELD_DECL
31970 && TREE_CODE (*node) != TYPE_DECL)
31972 warning (OPT_Wattributes, "%qE attribute only applies to functions",
31974 *no_add_attrs = true;
31979 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
31981 *no_add_attrs = true;
31984 if (is_attribute_p ("callee_pop_aggregate_return", name))
31988 cst = TREE_VALUE (args);
31989 if (TREE_CODE (cst) != INTEGER_CST)
31991 warning (OPT_Wattributes,
31992 "%qE attribute requires an integer constant argument",
31994 *no_add_attrs = true;
31996 else if (compare_tree_int (cst, 0) != 0
31997 && compare_tree_int (cst, 1) != 0)
31999 warning (OPT_Wattributes,
32000 "argument to %qE attribute is neither zero, nor one",
32002 *no_add_attrs = true;
32011 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
32012 struct attribute_spec.handler. */
32014 ix86_handle_abi_attribute (tree *node, tree name,
32015 tree args ATTRIBUTE_UNUSED,
32016 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32018 if (TREE_CODE (*node) != FUNCTION_TYPE
32019 && TREE_CODE (*node) != METHOD_TYPE
32020 && TREE_CODE (*node) != FIELD_DECL
32021 && TREE_CODE (*node) != TYPE_DECL)
32023 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32025 *no_add_attrs = true;
32029 /* Can combine regparm with all attributes but fastcall. */
32030 if (is_attribute_p ("ms_abi", name))
32032 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
32034 error ("ms_abi and sysv_abi attributes are not compatible");
32039 else if (is_attribute_p ("sysv_abi", name))
32041 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
32043 error ("ms_abi and sysv_abi attributes are not compatible");
32052 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32053 struct attribute_spec.handler. */
32055 ix86_handle_struct_attribute (tree *node, tree name,
32056 tree args ATTRIBUTE_UNUSED,
32057 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32060 if (DECL_P (*node))
32062 if (TREE_CODE (*node) == TYPE_DECL)
32063 type = &TREE_TYPE (*node);
32068 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
32070 warning (OPT_Wattributes, "%qE attribute ignored",
32072 *no_add_attrs = true;
32075 else if ((is_attribute_p ("ms_struct", name)
32076 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
32077 || ((is_attribute_p ("gcc_struct", name)
32078 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
32080 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
32082 *no_add_attrs = true;
32089 ix86_handle_fndecl_attribute (tree *node, tree name,
32090 tree args ATTRIBUTE_UNUSED,
32091 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32093 if (TREE_CODE (*node) != FUNCTION_DECL)
32095 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32097 *no_add_attrs = true;
32103 ix86_ms_bitfield_layout_p (const_tree record_type)
32105 return ((TARGET_MS_BITFIELD_LAYOUT
32106 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
32107 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
32110 /* Returns an expression indicating where the this parameter is
32111 located on entry to the FUNCTION. */
32114 x86_this_parameter (tree function)
32116 tree type = TREE_TYPE (function);
32117 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
32122 const int *parm_regs;
32124 if (ix86_function_type_abi (type) == MS_ABI)
32125 parm_regs = x86_64_ms_abi_int_parameter_registers;
32127 parm_regs = x86_64_int_parameter_registers;
32128 return gen_rtx_REG (DImode, parm_regs[aggr]);
32131 nregs = ix86_function_regparm (type, function);
32133 if (nregs > 0 && !stdarg_p (type))
32136 unsigned int ccvt = ix86_get_callcvt (type);
32138 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
32139 regno = aggr ? DX_REG : CX_REG;
32140 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
32144 return gen_rtx_MEM (SImode,
32145 plus_constant (stack_pointer_rtx, 4));
32154 return gen_rtx_MEM (SImode,
32155 plus_constant (stack_pointer_rtx, 4));
32158 return gen_rtx_REG (SImode, regno);
32161 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
32164 /* Determine whether x86_output_mi_thunk can succeed. */
32167 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
32168 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
32169 HOST_WIDE_INT vcall_offset, const_tree function)
32171 /* 64-bit can handle anything. */
32175 /* For 32-bit, everything's fine if we have one free register. */
32176 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
32179 /* Need a free register for vcall_offset. */
32183 /* Need a free register for GOT references. */
32184 if (flag_pic && !targetm.binds_local_p (function))
32187 /* Otherwise ok. */
32191 /* Output the assembler code for a thunk function. THUNK_DECL is the
32192 declaration for the thunk function itself, FUNCTION is the decl for
32193 the target function. DELTA is an immediate constant offset to be
32194 added to THIS. If VCALL_OFFSET is nonzero, the word at
32195 *(*this + vcall_offset) should be added to THIS. */
32198 x86_output_mi_thunk (FILE *file,
32199 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
32200 HOST_WIDE_INT vcall_offset, tree function)
32202 rtx this_param = x86_this_parameter (function);
32203 rtx this_reg, tmp, fnaddr;
32204 unsigned int tmp_regno;
32207 tmp_regno = R10_REG;
32210 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
32211 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
32212 tmp_regno = AX_REG;
32214 tmp_regno = CX_REG;
32217 emit_note (NOTE_INSN_PROLOGUE_END);
32219 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
32220 pull it in now and let DELTA benefit. */
32221 if (REG_P (this_param))
32222 this_reg = this_param;
32223 else if (vcall_offset)
32225 /* Put the this parameter into %eax. */
32226 this_reg = gen_rtx_REG (Pmode, AX_REG);
32227 emit_move_insn (this_reg, this_param);
32230 this_reg = NULL_RTX;
32232 /* Adjust the this parameter by a fixed constant. */
32235 rtx delta_rtx = GEN_INT (delta);
32236 rtx delta_dst = this_reg ? this_reg : this_param;
32240 if (!x86_64_general_operand (delta_rtx, Pmode))
32242 tmp = gen_rtx_REG (Pmode, tmp_regno);
32243 emit_move_insn (tmp, delta_rtx);
32248 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
32251 /* Adjust the this parameter by a value stored in the vtable. */
32254 rtx vcall_addr, vcall_mem, this_mem;
32256 tmp = gen_rtx_REG (Pmode, tmp_regno);
32258 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
32259 if (Pmode != ptr_mode)
32260 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
32261 emit_move_insn (tmp, this_mem);
32263 /* Adjust the this parameter. */
32264 vcall_addr = plus_constant (tmp, vcall_offset);
32266 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
32268 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
32269 emit_move_insn (tmp2, GEN_INT (vcall_offset));
32270 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
32273 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
32274 if (Pmode != ptr_mode)
32275 emit_insn (gen_addsi_1_zext (this_reg,
32276 gen_rtx_REG (ptr_mode,
32280 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
32283 /* If necessary, drop THIS back to its stack slot. */
32284 if (this_reg && this_reg != this_param)
32285 emit_move_insn (this_param, this_reg);
32287 fnaddr = XEXP (DECL_RTL (function), 0);
32290 if (!flag_pic || targetm.binds_local_p (function)
32291 || cfun->machine->call_abi == MS_ABI)
32295 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
32296 tmp = gen_rtx_CONST (Pmode, tmp);
32297 fnaddr = gen_rtx_MEM (Pmode, tmp);
32302 if (!flag_pic || targetm.binds_local_p (function))
32305 else if (TARGET_MACHO)
32307 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
32308 fnaddr = XEXP (fnaddr, 0);
32310 #endif /* TARGET_MACHO */
32313 tmp = gen_rtx_REG (Pmode, CX_REG);
32314 output_set_got (tmp, NULL_RTX);
32316 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
32317 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
32318 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
32322 /* Our sibling call patterns do not allow memories, because we have no
32323 predicate that can distinguish between frame and non-frame memory.
32324 For our purposes here, we can get away with (ab)using a jump pattern,
32325 because we're going to do no optimization. */
32326 if (MEM_P (fnaddr))
32327 emit_jump_insn (gen_indirect_jump (fnaddr));
32330 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
32331 fnaddr = legitimize_pic_address (fnaddr,
32332 gen_rtx_REG (Pmode, tmp_regno));
32334 if (!sibcall_insn_operand (fnaddr, Pmode))
32336 tmp = gen_rtx_REG (Pmode, tmp_regno);
32337 if (GET_MODE (fnaddr) != Pmode)
32338 fnaddr = gen_rtx_ZERO_EXTEND (Pmode, fnaddr);
32339 emit_move_insn (tmp, fnaddr);
32343 tmp = gen_rtx_MEM (QImode, fnaddr);
32344 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
32345 tmp = emit_call_insn (tmp);
32346 SIBLING_CALL_P (tmp) = 1;
32350 /* Emit just enough of rest_of_compilation to get the insns emitted.
32351 Note that use_thunk calls assemble_start_function et al. */
32352 tmp = get_insns ();
32353 insn_locators_alloc ();
32354 shorten_branches (tmp);
32355 final_start_function (tmp, file, 1);
32356 final (tmp, file, 1);
32357 final_end_function ();
32361 x86_file_start (void)
32363 default_file_start ();
32365 darwin_file_start ();
32367 if (X86_FILE_START_VERSION_DIRECTIVE)
32368 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
32369 if (X86_FILE_START_FLTUSED)
32370 fputs ("\t.global\t__fltused\n", asm_out_file);
32371 if (ix86_asm_dialect == ASM_INTEL)
32372 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
32376 x86_field_alignment (tree field, int computed)
32378 enum machine_mode mode;
32379 tree type = TREE_TYPE (field);
32381 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
32383 mode = TYPE_MODE (strip_array_types (type));
32384 if (mode == DFmode || mode == DCmode
32385 || GET_MODE_CLASS (mode) == MODE_INT
32386 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
32387 return MIN (32, computed);
32391 /* Output assembler code to FILE to increment profiler label # LABELNO
32392 for profiling a function entry. */
32394 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
32396 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
32401 #ifndef NO_PROFILE_COUNTERS
32402 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
32405 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
32406 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
32408 fprintf (file, "\tcall\t%s\n", mcount_name);
32412 #ifndef NO_PROFILE_COUNTERS
32413 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
32416 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
32420 #ifndef NO_PROFILE_COUNTERS
32421 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
32424 fprintf (file, "\tcall\t%s\n", mcount_name);
32428 /* We don't have exact information about the insn sizes, but we may assume
32429 quite safely that we are informed about all 1 byte insns and memory
32430 address sizes. This is enough to eliminate unnecessary padding in
32434 min_insn_size (rtx insn)
32438 if (!INSN_P (insn) || !active_insn_p (insn))
32441 /* Discard alignments we've emit and jump instructions. */
32442 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
32443 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
32445 if (JUMP_TABLE_DATA_P (insn))
32448 /* Important case - calls are always 5 bytes.
32449 It is common to have many calls in the row. */
32451 && symbolic_reference_mentioned_p (PATTERN (insn))
32452 && !SIBLING_CALL_P (insn))
32454 len = get_attr_length (insn);
32458 /* For normal instructions we rely on get_attr_length being exact,
32459 with a few exceptions. */
32460 if (!JUMP_P (insn))
32462 enum attr_type type = get_attr_type (insn);
32467 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
32468 || asm_noperands (PATTERN (insn)) >= 0)
32475 /* Otherwise trust get_attr_length. */
32479 l = get_attr_length_address (insn);
32480 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
32489 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32491 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
32495 ix86_avoid_jump_mispredicts (void)
32497 rtx insn, start = get_insns ();
32498 int nbytes = 0, njumps = 0;
32501 /* Look for all minimal intervals of instructions containing 4 jumps.
32502 The intervals are bounded by START and INSN. NBYTES is the total
32503 size of instructions in the interval including INSN and not including
32504 START. When the NBYTES is smaller than 16 bytes, it is possible
32505 that the end of START and INSN ends up in the same 16byte page.
32507 The smallest offset in the page INSN can start is the case where START
32508 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
32509 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
32511 for (insn = start; insn; insn = NEXT_INSN (insn))
32515 if (LABEL_P (insn))
32517 int align = label_to_alignment (insn);
32518 int max_skip = label_to_max_skip (insn);
32522 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
32523 already in the current 16 byte page, because otherwise
32524 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
32525 bytes to reach 16 byte boundary. */
32527 || (align <= 3 && max_skip != (1 << align) - 1))
32530 fprintf (dump_file, "Label %i with max_skip %i\n",
32531 INSN_UID (insn), max_skip);
32534 while (nbytes + max_skip >= 16)
32536 start = NEXT_INSN (start);
32537 if ((JUMP_P (start)
32538 && GET_CODE (PATTERN (start)) != ADDR_VEC
32539 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
32541 njumps--, isjump = 1;
32544 nbytes -= min_insn_size (start);
32550 min_size = min_insn_size (insn);
32551 nbytes += min_size;
32553 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
32554 INSN_UID (insn), min_size);
32556 && GET_CODE (PATTERN (insn)) != ADDR_VEC
32557 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
32565 start = NEXT_INSN (start);
32566 if ((JUMP_P (start)
32567 && GET_CODE (PATTERN (start)) != ADDR_VEC
32568 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
32570 njumps--, isjump = 1;
32573 nbytes -= min_insn_size (start);
32575 gcc_assert (njumps >= 0);
32577 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
32578 INSN_UID (start), INSN_UID (insn), nbytes);
32580 if (njumps == 3 && isjump && nbytes < 16)
32582 int padsize = 15 - nbytes + min_insn_size (insn);
32585 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
32586 INSN_UID (insn), padsize);
32587 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
32593 /* AMD Athlon works faster
32594 when RET is not destination of conditional jump or directly preceded
32595 by other jump instruction. We avoid the penalty by inserting NOP just
32596 before the RET instructions in such cases. */
32598 ix86_pad_returns (void)
32603 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
32605 basic_block bb = e->src;
32606 rtx ret = BB_END (bb);
32608 bool replace = false;
32610 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
32611 || optimize_bb_for_size_p (bb))
32613 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
32614 if (active_insn_p (prev) || LABEL_P (prev))
32616 if (prev && LABEL_P (prev))
32621 FOR_EACH_EDGE (e, ei, bb->preds)
32622 if (EDGE_FREQUENCY (e) && e->src->index >= 0
32623 && !(e->flags & EDGE_FALLTHRU))
32628 prev = prev_active_insn (ret);
32630 && ((JUMP_P (prev) && any_condjump_p (prev))
32633 /* Empty functions get branch mispredict even when
32634 the jump destination is not visible to us. */
32635 if (!prev && !optimize_function_for_size_p (cfun))
32640 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
32646 /* Count the minimum number of instructions in BB. Return 4 if the
32647 number of instructions >= 4. */
32650 ix86_count_insn_bb (basic_block bb)
32653 int insn_count = 0;
32655 /* Count number of instructions in this block. Return 4 if the number
32656 of instructions >= 4. */
32657 FOR_BB_INSNS (bb, insn)
32659 /* Only happen in exit blocks. */
32661 && ANY_RETURN_P (PATTERN (insn)))
32664 if (NONDEBUG_INSN_P (insn)
32665 && GET_CODE (PATTERN (insn)) != USE
32666 && GET_CODE (PATTERN (insn)) != CLOBBER)
32669 if (insn_count >= 4)
32678 /* Count the minimum number of instructions in code path in BB.
32679 Return 4 if the number of instructions >= 4. */
32682 ix86_count_insn (basic_block bb)
32686 int min_prev_count;
32688 /* Only bother counting instructions along paths with no
32689 more than 2 basic blocks between entry and exit. Given
32690 that BB has an edge to exit, determine if a predecessor
32691 of BB has an edge from entry. If so, compute the number
32692 of instructions in the predecessor block. If there
32693 happen to be multiple such blocks, compute the minimum. */
32694 min_prev_count = 4;
32695 FOR_EACH_EDGE (e, ei, bb->preds)
32698 edge_iterator prev_ei;
32700 if (e->src == ENTRY_BLOCK_PTR)
32702 min_prev_count = 0;
32705 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
32707 if (prev_e->src == ENTRY_BLOCK_PTR)
32709 int count = ix86_count_insn_bb (e->src);
32710 if (count < min_prev_count)
32711 min_prev_count = count;
32717 if (min_prev_count < 4)
32718 min_prev_count += ix86_count_insn_bb (bb);
32720 return min_prev_count;
32723 /* Pad short funtion to 4 instructions. */
32726 ix86_pad_short_function (void)
32731 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
32733 rtx ret = BB_END (e->src);
32734 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
32736 int insn_count = ix86_count_insn (e->src);
32738 /* Pad short function. */
32739 if (insn_count < 4)
32743 /* Find epilogue. */
32746 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
32747 insn = PREV_INSN (insn);
32752 /* Two NOPs count as one instruction. */
32753 insn_count = 2 * (4 - insn_count);
32754 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
32760 /* Implement machine specific optimizations. We implement padding of returns
32761 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
32765 /* We are freeing block_for_insn in the toplev to keep compatibility
32766 with old MDEP_REORGS that are not CFG based. Recompute it now. */
32767 compute_bb_for_insn ();
32769 /* Run the vzeroupper optimization if needed. */
32770 if (TARGET_VZEROUPPER)
32771 move_or_delete_vzeroupper ();
32773 if (optimize && optimize_function_for_speed_p (cfun))
32775 if (TARGET_PAD_SHORT_FUNCTION)
32776 ix86_pad_short_function ();
32777 else if (TARGET_PAD_RETURNS)
32778 ix86_pad_returns ();
32779 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32780 if (TARGET_FOUR_JUMP_LIMIT)
32781 ix86_avoid_jump_mispredicts ();
32786 /* Return nonzero when QImode register that must be represented via REX prefix
32789 x86_extended_QIreg_mentioned_p (rtx insn)
32792 extract_insn_cached (insn);
32793 for (i = 0; i < recog_data.n_operands; i++)
32794 if (REG_P (recog_data.operand[i])
32795 && REGNO (recog_data.operand[i]) > BX_REG)
32800 /* Return nonzero when P points to register encoded via REX prefix.
32801 Called via for_each_rtx. */
32803 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
32805 unsigned int regno;
32808 regno = REGNO (*p);
32809 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
32812 /* Return true when INSN mentions register that must be encoded using REX
32815 x86_extended_reg_mentioned_p (rtx insn)
32817 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
32818 extended_reg_mentioned_1, NULL);
32821 /* If profitable, negate (without causing overflow) integer constant
32822 of mode MODE at location LOC. Return true in this case. */
32824 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
32828 if (!CONST_INT_P (*loc))
32834 /* DImode x86_64 constants must fit in 32 bits. */
32835 gcc_assert (x86_64_immediate_operand (*loc, mode));
32846 gcc_unreachable ();
32849 /* Avoid overflows. */
32850 if (mode_signbit_p (mode, *loc))
32853 val = INTVAL (*loc);
32855 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
32856 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
32857 if ((val < 0 && val != -128)
32860 *loc = GEN_INT (-val);
32867 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
32868 optabs would emit if we didn't have TFmode patterns. */
32871 x86_emit_floatuns (rtx operands[2])
32873 rtx neglab, donelab, i0, i1, f0, in, out;
32874 enum machine_mode mode, inmode;
32876 inmode = GET_MODE (operands[1]);
32877 gcc_assert (inmode == SImode || inmode == DImode);
32880 in = force_reg (inmode, operands[1]);
32881 mode = GET_MODE (out);
32882 neglab = gen_label_rtx ();
32883 donelab = gen_label_rtx ();
32884 f0 = gen_reg_rtx (mode);
32886 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
32888 expand_float (out, in, 0);
32890 emit_jump_insn (gen_jump (donelab));
32893 emit_label (neglab);
32895 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
32897 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
32899 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
32901 expand_float (f0, i0, 0);
32903 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
32905 emit_label (donelab);
32908 /* AVX2 does support 32-byte integer vector operations,
32909 thus the longest vector we are faced with is V32QImode. */
32910 #define MAX_VECT_LEN 32
32912 struct expand_vec_perm_d
32914 rtx target, op0, op1;
32915 unsigned char perm[MAX_VECT_LEN];
32916 enum machine_mode vmode;
32917 unsigned char nelt;
32921 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
32922 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
32924 /* Get a vector mode of the same size as the original but with elements
32925 twice as wide. This is only guaranteed to apply to integral vectors. */
32927 static inline enum machine_mode
32928 get_mode_wider_vector (enum machine_mode o)
32930 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
32931 enum machine_mode n = GET_MODE_WIDER_MODE (o);
32932 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
32933 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
32937 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32938 with all elements equal to VAR. Return true if successful. */
32941 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
32942 rtx target, rtx val)
32965 /* First attempt to recognize VAL as-is. */
32966 dup = gen_rtx_VEC_DUPLICATE (mode, val);
32967 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
32968 if (recog_memoized (insn) < 0)
32971 /* If that fails, force VAL into a register. */
32974 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
32975 seq = get_insns ();
32978 emit_insn_before (seq, insn);
32980 ok = recog_memoized (insn) >= 0;
32989 if (TARGET_SSE || TARGET_3DNOW_A)
32993 val = gen_lowpart (SImode, val);
32994 x = gen_rtx_TRUNCATE (HImode, val);
32995 x = gen_rtx_VEC_DUPLICATE (mode, x);
32996 emit_insn (gen_rtx_SET (VOIDmode, target, x));
33009 struct expand_vec_perm_d dperm;
33013 memset (&dperm, 0, sizeof (dperm));
33014 dperm.target = target;
33015 dperm.vmode = mode;
33016 dperm.nelt = GET_MODE_NUNITS (mode);
33017 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
33019 /* Extend to SImode using a paradoxical SUBREG. */
33020 tmp1 = gen_reg_rtx (SImode);
33021 emit_move_insn (tmp1, gen_lowpart (SImode, val));
33023 /* Insert the SImode value as low element of a V4SImode vector. */
33024 tmp2 = gen_lowpart (V4SImode, dperm.op0);
33025 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
33027 ok = (expand_vec_perm_1 (&dperm)
33028 || expand_vec_perm_broadcast_1 (&dperm));
33040 /* Replicate the value once into the next wider mode and recurse. */
33042 enum machine_mode smode, wsmode, wvmode;
33045 smode = GET_MODE_INNER (mode);
33046 wvmode = get_mode_wider_vector (mode);
33047 wsmode = GET_MODE_INNER (wvmode);
33049 val = convert_modes (wsmode, smode, val, true);
33050 x = expand_simple_binop (wsmode, ASHIFT, val,
33051 GEN_INT (GET_MODE_BITSIZE (smode)),
33052 NULL_RTX, 1, OPTAB_LIB_WIDEN);
33053 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
33055 x = gen_lowpart (wvmode, target);
33056 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
33064 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
33065 rtx x = gen_reg_rtx (hvmode);
33067 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
33070 x = gen_rtx_VEC_CONCAT (mode, x, x);
33071 emit_insn (gen_rtx_SET (VOIDmode, target, x));
33080 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33081 whose ONE_VAR element is VAR, and other elements are zero. Return true
33085 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
33086 rtx target, rtx var, int one_var)
33088 enum machine_mode vsimode;
33091 bool use_vector_set = false;
33096 /* For SSE4.1, we normally use vector set. But if the second
33097 element is zero and inter-unit moves are OK, we use movq
33099 use_vector_set = (TARGET_64BIT
33101 && !(TARGET_INTER_UNIT_MOVES
33107 use_vector_set = TARGET_SSE4_1;
33110 use_vector_set = TARGET_SSE2;
33113 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
33120 use_vector_set = TARGET_AVX;
33123 /* Use ix86_expand_vector_set in 64bit mode only. */
33124 use_vector_set = TARGET_AVX && TARGET_64BIT;
33130 if (use_vector_set)
33132 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
33133 var = force_reg (GET_MODE_INNER (mode), var);
33134 ix86_expand_vector_set (mmx_ok, target, var, one_var);
33150 var = force_reg (GET_MODE_INNER (mode), var);
33151 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
33152 emit_insn (gen_rtx_SET (VOIDmode, target, x));
33157 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
33158 new_target = gen_reg_rtx (mode);
33160 new_target = target;
33161 var = force_reg (GET_MODE_INNER (mode), var);
33162 x = gen_rtx_VEC_DUPLICATE (mode, var);
33163 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
33164 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
33167 /* We need to shuffle the value to the correct position, so
33168 create a new pseudo to store the intermediate result. */
33170 /* With SSE2, we can use the integer shuffle insns. */
33171 if (mode != V4SFmode && TARGET_SSE2)
33173 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
33175 GEN_INT (one_var == 1 ? 0 : 1),
33176 GEN_INT (one_var == 2 ? 0 : 1),
33177 GEN_INT (one_var == 3 ? 0 : 1)));
33178 if (target != new_target)
33179 emit_move_insn (target, new_target);
33183 /* Otherwise convert the intermediate result to V4SFmode and
33184 use the SSE1 shuffle instructions. */
33185 if (mode != V4SFmode)
33187 tmp = gen_reg_rtx (V4SFmode);
33188 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
33193 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
33195 GEN_INT (one_var == 1 ? 0 : 1),
33196 GEN_INT (one_var == 2 ? 0+4 : 1+4),
33197 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
33199 if (mode != V4SFmode)
33200 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
33201 else if (tmp != target)
33202 emit_move_insn (target, tmp);
33204 else if (target != new_target)
33205 emit_move_insn (target, new_target);
33210 vsimode = V4SImode;
33216 vsimode = V2SImode;
33222 /* Zero extend the variable element to SImode and recurse. */
33223 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
33225 x = gen_reg_rtx (vsimode);
33226 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
33228 gcc_unreachable ();
33230 emit_move_insn (target, gen_lowpart (mode, x));
33238 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33239 consisting of the values in VALS. It is known that all elements
33240 except ONE_VAR are constants. Return true if successful. */
33243 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
33244 rtx target, rtx vals, int one_var)
33246 rtx var = XVECEXP (vals, 0, one_var);
33247 enum machine_mode wmode;
33250 const_vec = copy_rtx (vals);
33251 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
33252 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
33260 /* For the two element vectors, it's just as easy to use
33261 the general case. */
33265 /* Use ix86_expand_vector_set in 64bit mode only. */
33288 /* There's no way to set one QImode entry easily. Combine
33289 the variable value with its adjacent constant value, and
33290 promote to an HImode set. */
33291 x = XVECEXP (vals, 0, one_var ^ 1);
33294 var = convert_modes (HImode, QImode, var, true);
33295 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
33296 NULL_RTX, 1, OPTAB_LIB_WIDEN);
33297 x = GEN_INT (INTVAL (x) & 0xff);
33301 var = convert_modes (HImode, QImode, var, true);
33302 x = gen_int_mode (INTVAL (x) << 8, HImode);
33304 if (x != const0_rtx)
33305 var = expand_simple_binop (HImode, IOR, var, x, var,
33306 1, OPTAB_LIB_WIDEN);
33308 x = gen_reg_rtx (wmode);
33309 emit_move_insn (x, gen_lowpart (wmode, const_vec));
33310 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
33312 emit_move_insn (target, gen_lowpart (mode, x));
33319 emit_move_insn (target, const_vec);
33320 ix86_expand_vector_set (mmx_ok, target, var, one_var);
33324 /* A subroutine of ix86_expand_vector_init_general. Use vector
33325 concatenate to handle the most general case: all values variable,
33326 and none identical. */
33329 ix86_expand_vector_init_concat (enum machine_mode mode,
33330 rtx target, rtx *ops, int n)
33332 enum machine_mode cmode, hmode = VOIDmode;
33333 rtx first[8], second[4];
33373 gcc_unreachable ();
33376 if (!register_operand (ops[1], cmode))
33377 ops[1] = force_reg (cmode, ops[1]);
33378 if (!register_operand (ops[0], cmode))
33379 ops[0] = force_reg (cmode, ops[0]);
33380 emit_insn (gen_rtx_SET (VOIDmode, target,
33381 gen_rtx_VEC_CONCAT (mode, ops[0],
33401 gcc_unreachable ();
33417 gcc_unreachable ();
33422 /* FIXME: We process inputs backward to help RA. PR 36222. */
33425 for (; i > 0; i -= 2, j--)
33427 first[j] = gen_reg_rtx (cmode);
33428 v = gen_rtvec (2, ops[i - 1], ops[i]);
33429 ix86_expand_vector_init (false, first[j],
33430 gen_rtx_PARALLEL (cmode, v));
33436 gcc_assert (hmode != VOIDmode);
33437 for (i = j = 0; i < n; i += 2, j++)
33439 second[j] = gen_reg_rtx (hmode);
33440 ix86_expand_vector_init_concat (hmode, second [j],
33444 ix86_expand_vector_init_concat (mode, target, second, n);
33447 ix86_expand_vector_init_concat (mode, target, first, n);
33451 gcc_unreachable ();
33455 /* A subroutine of ix86_expand_vector_init_general. Use vector
33456 interleave to handle the most general case: all values variable,
33457 and none identical. */
33460 ix86_expand_vector_init_interleave (enum machine_mode mode,
33461 rtx target, rtx *ops, int n)
33463 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
33466 rtx (*gen_load_even) (rtx, rtx, rtx);
33467 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
33468 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
33473 gen_load_even = gen_vec_setv8hi;
33474 gen_interleave_first_low = gen_vec_interleave_lowv4si;
33475 gen_interleave_second_low = gen_vec_interleave_lowv2di;
33476 inner_mode = HImode;
33477 first_imode = V4SImode;
33478 second_imode = V2DImode;
33479 third_imode = VOIDmode;
33482 gen_load_even = gen_vec_setv16qi;
33483 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
33484 gen_interleave_second_low = gen_vec_interleave_lowv4si;
33485 inner_mode = QImode;
33486 first_imode = V8HImode;
33487 second_imode = V4SImode;
33488 third_imode = V2DImode;
33491 gcc_unreachable ();
33494 for (i = 0; i < n; i++)
33496 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
33497 op0 = gen_reg_rtx (SImode);
33498 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
33500 /* Insert the SImode value as low element of V4SImode vector. */
33501 op1 = gen_reg_rtx (V4SImode);
33502 op0 = gen_rtx_VEC_MERGE (V4SImode,
33503 gen_rtx_VEC_DUPLICATE (V4SImode,
33505 CONST0_RTX (V4SImode),
33507 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
33509 /* Cast the V4SImode vector back to a vector in orignal mode. */
33510 op0 = gen_reg_rtx (mode);
33511 emit_move_insn (op0, gen_lowpart (mode, op1));
33513 /* Load even elements into the second positon. */
33514 emit_insn (gen_load_even (op0,
33515 force_reg (inner_mode,
33519 /* Cast vector to FIRST_IMODE vector. */
33520 ops[i] = gen_reg_rtx (first_imode);
33521 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
33524 /* Interleave low FIRST_IMODE vectors. */
33525 for (i = j = 0; i < n; i += 2, j++)
33527 op0 = gen_reg_rtx (first_imode);
33528 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
33530 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
33531 ops[j] = gen_reg_rtx (second_imode);
33532 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
33535 /* Interleave low SECOND_IMODE vectors. */
33536 switch (second_imode)
33539 for (i = j = 0; i < n / 2; i += 2, j++)
33541 op0 = gen_reg_rtx (second_imode);
33542 emit_insn (gen_interleave_second_low (op0, ops[i],
33545 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
33547 ops[j] = gen_reg_rtx (third_imode);
33548 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
33550 second_imode = V2DImode;
33551 gen_interleave_second_low = gen_vec_interleave_lowv2di;
33555 op0 = gen_reg_rtx (second_imode);
33556 emit_insn (gen_interleave_second_low (op0, ops[0],
33559 /* Cast the SECOND_IMODE vector back to a vector on original
33561 emit_insn (gen_rtx_SET (VOIDmode, target,
33562 gen_lowpart (mode, op0)));
33566 gcc_unreachable ();
33570 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
33571 all values variable, and none identical. */
33574 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
33575 rtx target, rtx vals)
33577 rtx ops[32], op0, op1;
33578 enum machine_mode half_mode = VOIDmode;
33585 if (!mmx_ok && !TARGET_SSE)
33597 n = GET_MODE_NUNITS (mode);
33598 for (i = 0; i < n; i++)
33599 ops[i] = XVECEXP (vals, 0, i);
33600 ix86_expand_vector_init_concat (mode, target, ops, n);
33604 half_mode = V16QImode;
33608 half_mode = V8HImode;
33612 n = GET_MODE_NUNITS (mode);
33613 for (i = 0; i < n; i++)
33614 ops[i] = XVECEXP (vals, 0, i);
33615 op0 = gen_reg_rtx (half_mode);
33616 op1 = gen_reg_rtx (half_mode);
33617 ix86_expand_vector_init_interleave (half_mode, op0, ops,
33619 ix86_expand_vector_init_interleave (half_mode, op1,
33620 &ops [n >> 1], n >> 2);
33621 emit_insn (gen_rtx_SET (VOIDmode, target,
33622 gen_rtx_VEC_CONCAT (mode, op0, op1)));
33626 if (!TARGET_SSE4_1)
33634 /* Don't use ix86_expand_vector_init_interleave if we can't
33635 move from GPR to SSE register directly. */
33636 if (!TARGET_INTER_UNIT_MOVES)
33639 n = GET_MODE_NUNITS (mode);
33640 for (i = 0; i < n; i++)
33641 ops[i] = XVECEXP (vals, 0, i);
33642 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
33650 gcc_unreachable ();
33654 int i, j, n_elts, n_words, n_elt_per_word;
33655 enum machine_mode inner_mode;
33656 rtx words[4], shift;
33658 inner_mode = GET_MODE_INNER (mode);
33659 n_elts = GET_MODE_NUNITS (mode);
33660 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
33661 n_elt_per_word = n_elts / n_words;
33662 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
33664 for (i = 0; i < n_words; ++i)
33666 rtx word = NULL_RTX;
33668 for (j = 0; j < n_elt_per_word; ++j)
33670 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
33671 elt = convert_modes (word_mode, inner_mode, elt, true);
33677 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
33678 word, 1, OPTAB_LIB_WIDEN);
33679 word = expand_simple_binop (word_mode, IOR, word, elt,
33680 word, 1, OPTAB_LIB_WIDEN);
33688 emit_move_insn (target, gen_lowpart (mode, words[0]));
33689 else if (n_words == 2)
33691 rtx tmp = gen_reg_rtx (mode);
33692 emit_clobber (tmp);
33693 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
33694 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
33695 emit_move_insn (target, tmp);
33697 else if (n_words == 4)
33699 rtx tmp = gen_reg_rtx (V4SImode);
33700 gcc_assert (word_mode == SImode);
33701 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
33702 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
33703 emit_move_insn (target, gen_lowpart (mode, tmp));
33706 gcc_unreachable ();
33710 /* Initialize vector TARGET via VALS. Suppress the use of MMX
33711 instructions unless MMX_OK is true. */
33714 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
33716 enum machine_mode mode = GET_MODE (target);
33717 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33718 int n_elts = GET_MODE_NUNITS (mode);
33719 int n_var = 0, one_var = -1;
33720 bool all_same = true, all_const_zero = true;
33724 for (i = 0; i < n_elts; ++i)
33726 x = XVECEXP (vals, 0, i);
33727 if (!(CONST_INT_P (x)
33728 || GET_CODE (x) == CONST_DOUBLE
33729 || GET_CODE (x) == CONST_FIXED))
33730 n_var++, one_var = i;
33731 else if (x != CONST0_RTX (inner_mode))
33732 all_const_zero = false;
33733 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
33737 /* Constants are best loaded from the constant pool. */
33740 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
33744 /* If all values are identical, broadcast the value. */
33746 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
33747 XVECEXP (vals, 0, 0)))
33750 /* Values where only one field is non-constant are best loaded from
33751 the pool and overwritten via move later. */
33755 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
33756 XVECEXP (vals, 0, one_var),
33760 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
33764 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
33768 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
33770 enum machine_mode mode = GET_MODE (target);
33771 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33772 enum machine_mode half_mode;
33773 bool use_vec_merge = false;
33775 static rtx (*gen_extract[6][2]) (rtx, rtx)
33777 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
33778 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
33779 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
33780 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
33781 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
33782 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
33784 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
33786 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
33787 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
33788 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
33789 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
33790 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
33791 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
33801 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
33802 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
33804 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
33806 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
33807 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33813 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
33817 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
33818 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
33820 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
33822 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
33823 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33830 /* For the two element vectors, we implement a VEC_CONCAT with
33831 the extraction of the other element. */
33833 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
33834 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
33837 op0 = val, op1 = tmp;
33839 op0 = tmp, op1 = val;
33841 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
33842 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33847 use_vec_merge = TARGET_SSE4_1;
33854 use_vec_merge = true;
33858 /* tmp = target = A B C D */
33859 tmp = copy_to_reg (target);
33860 /* target = A A B B */
33861 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
33862 /* target = X A B B */
33863 ix86_expand_vector_set (false, target, val, 0);
33864 /* target = A X C D */
33865 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33866 const1_rtx, const0_rtx,
33867 GEN_INT (2+4), GEN_INT (3+4)));
33871 /* tmp = target = A B C D */
33872 tmp = copy_to_reg (target);
33873 /* tmp = X B C D */
33874 ix86_expand_vector_set (false, tmp, val, 0);
33875 /* target = A B X D */
33876 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33877 const0_rtx, const1_rtx,
33878 GEN_INT (0+4), GEN_INT (3+4)));
33882 /* tmp = target = A B C D */
33883 tmp = copy_to_reg (target);
33884 /* tmp = X B C D */
33885 ix86_expand_vector_set (false, tmp, val, 0);
33886 /* target = A B X D */
33887 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33888 const0_rtx, const1_rtx,
33889 GEN_INT (2+4), GEN_INT (0+4)));
33893 gcc_unreachable ();
33898 use_vec_merge = TARGET_SSE4_1;
33902 /* Element 0 handled by vec_merge below. */
33905 use_vec_merge = true;
33911 /* With SSE2, use integer shuffles to swap element 0 and ELT,
33912 store into element 0, then shuffle them back. */
33916 order[0] = GEN_INT (elt);
33917 order[1] = const1_rtx;
33918 order[2] = const2_rtx;
33919 order[3] = GEN_INT (3);
33920 order[elt] = const0_rtx;
33922 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
33923 order[1], order[2], order[3]));
33925 ix86_expand_vector_set (false, target, val, 0);
33927 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
33928 order[1], order[2], order[3]));
33932 /* For SSE1, we have to reuse the V4SF code. */
33933 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
33934 gen_lowpart (SFmode, val), elt);
33939 use_vec_merge = TARGET_SSE2;
33942 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
33946 use_vec_merge = TARGET_SSE4_1;
33953 half_mode = V16QImode;
33959 half_mode = V8HImode;
33965 half_mode = V4SImode;
33971 half_mode = V2DImode;
33977 half_mode = V4SFmode;
33983 half_mode = V2DFmode;
33989 /* Compute offset. */
33993 gcc_assert (i <= 1);
33995 /* Extract the half. */
33996 tmp = gen_reg_rtx (half_mode);
33997 emit_insn (gen_extract[j][i] (tmp, target));
33999 /* Put val in tmp at elt. */
34000 ix86_expand_vector_set (false, tmp, val, elt);
34003 emit_insn (gen_insert[j][i] (target, target, tmp));
34012 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
34013 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
34014 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
34018 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
34020 emit_move_insn (mem, target);
34022 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
34023 emit_move_insn (tmp, val);
34025 emit_move_insn (target, mem);
34030 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
34032 enum machine_mode mode = GET_MODE (vec);
34033 enum machine_mode inner_mode = GET_MODE_INNER (mode);
34034 bool use_vec_extr = false;
34047 use_vec_extr = true;
34051 use_vec_extr = TARGET_SSE4_1;
34063 tmp = gen_reg_rtx (mode);
34064 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
34065 GEN_INT (elt), GEN_INT (elt),
34066 GEN_INT (elt+4), GEN_INT (elt+4)));
34070 tmp = gen_reg_rtx (mode);
34071 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
34075 gcc_unreachable ();
34078 use_vec_extr = true;
34083 use_vec_extr = TARGET_SSE4_1;
34097 tmp = gen_reg_rtx (mode);
34098 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
34099 GEN_INT (elt), GEN_INT (elt),
34100 GEN_INT (elt), GEN_INT (elt)));
34104 tmp = gen_reg_rtx (mode);
34105 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
34109 gcc_unreachable ();
34112 use_vec_extr = true;
34117 /* For SSE1, we have to reuse the V4SF code. */
34118 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
34119 gen_lowpart (V4SFmode, vec), elt);
34125 use_vec_extr = TARGET_SSE2;
34128 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
34132 use_vec_extr = TARGET_SSE4_1;
34138 tmp = gen_reg_rtx (V4SFmode);
34140 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
34142 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
34143 ix86_expand_vector_extract (false, target, tmp, elt & 3);
34151 tmp = gen_reg_rtx (V2DFmode);
34153 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
34155 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
34156 ix86_expand_vector_extract (false, target, tmp, elt & 1);
34164 tmp = gen_reg_rtx (V16QImode);
34166 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
34168 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
34169 ix86_expand_vector_extract (false, target, tmp, elt & 15);
34177 tmp = gen_reg_rtx (V8HImode);
34179 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
34181 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
34182 ix86_expand_vector_extract (false, target, tmp, elt & 7);
34190 tmp = gen_reg_rtx (V4SImode);
34192 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
34194 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
34195 ix86_expand_vector_extract (false, target, tmp, elt & 3);
34203 tmp = gen_reg_rtx (V2DImode);
34205 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
34207 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
34208 ix86_expand_vector_extract (false, target, tmp, elt & 1);
34214 /* ??? Could extract the appropriate HImode element and shift. */
34221 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
34222 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
34224 /* Let the rtl optimizers know about the zero extension performed. */
34225 if (inner_mode == QImode || inner_mode == HImode)
34227 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
34228 target = gen_lowpart (SImode, target);
34231 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
34235 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
34237 emit_move_insn (mem, vec);
34239 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
34240 emit_move_insn (target, tmp);
34244 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
34245 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
34246 The upper bits of DEST are undefined, though they shouldn't cause
34247 exceptions (some bits from src or all zeros are ok). */
34250 emit_reduc_half (rtx dest, rtx src, int i)
34253 switch (GET_MODE (src))
34257 tem = gen_sse_movhlps (dest, src, src);
34259 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
34260 GEN_INT (1 + 4), GEN_INT (1 + 4));
34263 tem = gen_vec_interleave_highv2df (dest, src, src);
34269 tem = gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, dest),
34270 gen_lowpart (V1TImode, src),
34275 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
34277 tem = gen_avx_shufps256 (dest, src, src,
34278 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
34282 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
34284 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
34291 tem = gen_avx2_permv2ti (gen_lowpart (V4DImode, dest),
34292 gen_lowpart (V4DImode, src),
34293 gen_lowpart (V4DImode, src),
34296 tem = gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, dest),
34297 gen_lowpart (V2TImode, src),
34301 gcc_unreachable ();
34306 /* Expand a vector reduction. FN is the binary pattern to reduce;
34307 DEST is the destination; IN is the input vector. */
34310 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
34312 rtx half, dst, vec = in;
34313 enum machine_mode mode = GET_MODE (in);
34316 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
34318 && mode == V8HImode
34319 && fn == gen_uminv8hi3)
34321 emit_insn (gen_sse4_1_phminposuw (dest, in));
34325 for (i = GET_MODE_BITSIZE (mode);
34326 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
34329 half = gen_reg_rtx (mode);
34330 emit_reduc_half (half, vec, i);
34331 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
34334 dst = gen_reg_rtx (mode);
34335 emit_insn (fn (dst, half, vec));
34340 /* Target hook for scalar_mode_supported_p. */
34342 ix86_scalar_mode_supported_p (enum machine_mode mode)
34344 if (DECIMAL_FLOAT_MODE_P (mode))
34345 return default_decimal_float_supported_p ();
34346 else if (mode == TFmode)
34349 return default_scalar_mode_supported_p (mode);
34352 /* Implements target hook vector_mode_supported_p. */
34354 ix86_vector_mode_supported_p (enum machine_mode mode)
34356 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
34358 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
34360 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
34362 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
34364 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
34369 /* Target hook for c_mode_for_suffix. */
34370 static enum machine_mode
34371 ix86_c_mode_for_suffix (char suffix)
34381 /* Worker function for TARGET_MD_ASM_CLOBBERS.
34383 We do this in the new i386 backend to maintain source compatibility
34384 with the old cc0-based compiler. */
34387 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
34388 tree inputs ATTRIBUTE_UNUSED,
34391 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
34393 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
34398 /* Implements target vector targetm.asm.encode_section_info. */
34400 static void ATTRIBUTE_UNUSED
34401 ix86_encode_section_info (tree decl, rtx rtl, int first)
34403 default_encode_section_info (decl, rtl, first);
34405 if (TREE_CODE (decl) == VAR_DECL
34406 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
34407 && ix86_in_large_data_p (decl))
34408 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
34411 /* Worker function for REVERSE_CONDITION. */
34414 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
34416 return (mode != CCFPmode && mode != CCFPUmode
34417 ? reverse_condition (code)
34418 : reverse_condition_maybe_unordered (code));
34421 /* Output code to perform an x87 FP register move, from OPERANDS[1]
34425 output_387_reg_move (rtx insn, rtx *operands)
34427 if (REG_P (operands[0]))
34429 if (REG_P (operands[1])
34430 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
34432 if (REGNO (operands[0]) == FIRST_STACK_REG)
34433 return output_387_ffreep (operands, 0);
34434 return "fstp\t%y0";
34436 if (STACK_TOP_P (operands[0]))
34437 return "fld%Z1\t%y1";
34440 else if (MEM_P (operands[0]))
34442 gcc_assert (REG_P (operands[1]));
34443 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
34444 return "fstp%Z0\t%y0";
34447 /* There is no non-popping store to memory for XFmode.
34448 So if we need one, follow the store with a load. */
34449 if (GET_MODE (operands[0]) == XFmode)
34450 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
34452 return "fst%Z0\t%y0";
34459 /* Output code to perform a conditional jump to LABEL, if C2 flag in
34460 FP status register is set. */
34463 ix86_emit_fp_unordered_jump (rtx label)
34465 rtx reg = gen_reg_rtx (HImode);
34468 emit_insn (gen_x86_fnstsw_1 (reg));
34470 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
34472 emit_insn (gen_x86_sahf_1 (reg));
34474 temp = gen_rtx_REG (CCmode, FLAGS_REG);
34475 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
34479 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
34481 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
34482 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
34485 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
34486 gen_rtx_LABEL_REF (VOIDmode, label),
34488 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
34490 emit_jump_insn (temp);
34491 predict_jump (REG_BR_PROB_BASE * 10 / 100);
34494 /* Output code to perform a log1p XFmode calculation. */
34496 void ix86_emit_i387_log1p (rtx op0, rtx op1)
34498 rtx label1 = gen_label_rtx ();
34499 rtx label2 = gen_label_rtx ();
34501 rtx tmp = gen_reg_rtx (XFmode);
34502 rtx tmp2 = gen_reg_rtx (XFmode);
34505 emit_insn (gen_absxf2 (tmp, op1));
34506 test = gen_rtx_GE (VOIDmode, tmp,
34507 CONST_DOUBLE_FROM_REAL_VALUE (
34508 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
34510 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
34512 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
34513 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
34514 emit_jump (label2);
34516 emit_label (label1);
34517 emit_move_insn (tmp, CONST1_RTX (XFmode));
34518 emit_insn (gen_addxf3 (tmp, op1, tmp));
34519 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
34520 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
34522 emit_label (label2);
34525 /* Emit code for round calculation. */
34526 void ix86_emit_i387_round (rtx op0, rtx op1)
34528 enum machine_mode inmode = GET_MODE (op1);
34529 enum machine_mode outmode = GET_MODE (op0);
34530 rtx e1, e2, res, tmp, tmp1, half;
34531 rtx scratch = gen_reg_rtx (HImode);
34532 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
34533 rtx jump_label = gen_label_rtx ();
34535 rtx (*gen_abs) (rtx, rtx);
34536 rtx (*gen_neg) (rtx, rtx);
34541 gen_abs = gen_abssf2;
34544 gen_abs = gen_absdf2;
34547 gen_abs = gen_absxf2;
34550 gcc_unreachable ();
34556 gen_neg = gen_negsf2;
34559 gen_neg = gen_negdf2;
34562 gen_neg = gen_negxf2;
34565 gen_neg = gen_neghi2;
34568 gen_neg = gen_negsi2;
34571 gen_neg = gen_negdi2;
34574 gcc_unreachable ();
34577 e1 = gen_reg_rtx (inmode);
34578 e2 = gen_reg_rtx (inmode);
34579 res = gen_reg_rtx (outmode);
34581 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
34583 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
34585 /* scratch = fxam(op1) */
34586 emit_insn (gen_rtx_SET (VOIDmode, scratch,
34587 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
34589 /* e1 = fabs(op1) */
34590 emit_insn (gen_abs (e1, op1));
34592 /* e2 = e1 + 0.5 */
34593 half = force_reg (inmode, half);
34594 emit_insn (gen_rtx_SET (VOIDmode, e2,
34595 gen_rtx_PLUS (inmode, e1, half)));
34597 /* res = floor(e2) */
34598 if (inmode != XFmode)
34600 tmp1 = gen_reg_rtx (XFmode);
34602 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
34603 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
34613 rtx tmp0 = gen_reg_rtx (XFmode);
34615 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
34617 emit_insn (gen_rtx_SET (VOIDmode, res,
34618 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
34619 UNSPEC_TRUNC_NOOP)));
34623 emit_insn (gen_frndintxf2_floor (res, tmp1));
34626 emit_insn (gen_lfloorxfhi2 (res, tmp1));
34629 emit_insn (gen_lfloorxfsi2 (res, tmp1));
34632 emit_insn (gen_lfloorxfdi2 (res, tmp1));
34635 gcc_unreachable ();
34638 /* flags = signbit(a) */
34639 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
34641 /* if (flags) then res = -res */
34642 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
34643 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
34644 gen_rtx_LABEL_REF (VOIDmode, jump_label),
34646 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
34647 predict_jump (REG_BR_PROB_BASE * 50 / 100);
34648 JUMP_LABEL (insn) = jump_label;
34650 emit_insn (gen_neg (res, res));
34652 emit_label (jump_label);
34653 LABEL_NUSES (jump_label) = 1;
34655 emit_move_insn (op0, res);
34658 /* Output code to perform a Newton-Rhapson approximation of a single precision
34659 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
34661 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
34663 rtx x0, x1, e0, e1;
34665 x0 = gen_reg_rtx (mode);
34666 e0 = gen_reg_rtx (mode);
34667 e1 = gen_reg_rtx (mode);
34668 x1 = gen_reg_rtx (mode);
34670 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
34672 b = force_reg (mode, b);
34674 /* x0 = rcp(b) estimate */
34675 emit_insn (gen_rtx_SET (VOIDmode, x0,
34676 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
34679 emit_insn (gen_rtx_SET (VOIDmode, e0,
34680 gen_rtx_MULT (mode, x0, b)));
34683 emit_insn (gen_rtx_SET (VOIDmode, e0,
34684 gen_rtx_MULT (mode, x0, e0)));
34687 emit_insn (gen_rtx_SET (VOIDmode, e1,
34688 gen_rtx_PLUS (mode, x0, x0)));
34691 emit_insn (gen_rtx_SET (VOIDmode, x1,
34692 gen_rtx_MINUS (mode, e1, e0)));
34695 emit_insn (gen_rtx_SET (VOIDmode, res,
34696 gen_rtx_MULT (mode, a, x1)));
34699 /* Output code to perform a Newton-Rhapson approximation of a
34700 single precision floating point [reciprocal] square root. */
34702 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
34705 rtx x0, e0, e1, e2, e3, mthree, mhalf;
34708 x0 = gen_reg_rtx (mode);
34709 e0 = gen_reg_rtx (mode);
34710 e1 = gen_reg_rtx (mode);
34711 e2 = gen_reg_rtx (mode);
34712 e3 = gen_reg_rtx (mode);
34714 real_from_integer (&r, VOIDmode, -3, -1, 0);
34715 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
34717 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
34718 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
34720 if (VECTOR_MODE_P (mode))
34722 mthree = ix86_build_const_vector (mode, true, mthree);
34723 mhalf = ix86_build_const_vector (mode, true, mhalf);
34726 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
34727 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
34729 a = force_reg (mode, a);
34731 /* x0 = rsqrt(a) estimate */
34732 emit_insn (gen_rtx_SET (VOIDmode, x0,
34733 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
34736 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
34741 zero = gen_reg_rtx (mode);
34742 mask = gen_reg_rtx (mode);
34744 zero = force_reg (mode, CONST0_RTX(mode));
34745 emit_insn (gen_rtx_SET (VOIDmode, mask,
34746 gen_rtx_NE (mode, zero, a)));
34748 emit_insn (gen_rtx_SET (VOIDmode, x0,
34749 gen_rtx_AND (mode, x0, mask)));
34753 emit_insn (gen_rtx_SET (VOIDmode, e0,
34754 gen_rtx_MULT (mode, x0, a)));
34756 emit_insn (gen_rtx_SET (VOIDmode, e1,
34757 gen_rtx_MULT (mode, e0, x0)));
34760 mthree = force_reg (mode, mthree);
34761 emit_insn (gen_rtx_SET (VOIDmode, e2,
34762 gen_rtx_PLUS (mode, e1, mthree)));
34764 mhalf = force_reg (mode, mhalf);
34766 /* e3 = -.5 * x0 */
34767 emit_insn (gen_rtx_SET (VOIDmode, e3,
34768 gen_rtx_MULT (mode, x0, mhalf)));
34770 /* e3 = -.5 * e0 */
34771 emit_insn (gen_rtx_SET (VOIDmode, e3,
34772 gen_rtx_MULT (mode, e0, mhalf)));
34773 /* ret = e2 * e3 */
34774 emit_insn (gen_rtx_SET (VOIDmode, res,
34775 gen_rtx_MULT (mode, e2, e3)));
34778 #ifdef TARGET_SOLARIS
34779 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
34782 i386_solaris_elf_named_section (const char *name, unsigned int flags,
34785 /* With Binutils 2.15, the "@unwind" marker must be specified on
34786 every occurrence of the ".eh_frame" section, not just the first
34789 && strcmp (name, ".eh_frame") == 0)
34791 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
34792 flags & SECTION_WRITE ? "aw" : "a");
34797 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
34799 solaris_elf_asm_comdat_section (name, flags, decl);
34804 default_elf_asm_named_section (name, flags, decl);
34806 #endif /* TARGET_SOLARIS */
34808 /* Return the mangling of TYPE if it is an extended fundamental type. */
34810 static const char *
34811 ix86_mangle_type (const_tree type)
34813 type = TYPE_MAIN_VARIANT (type);
34815 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
34816 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
34819 switch (TYPE_MODE (type))
34822 /* __float128 is "g". */
34825 /* "long double" or __float80 is "e". */
34832 /* For 32-bit code we can save PIC register setup by using
34833 __stack_chk_fail_local hidden function instead of calling
34834 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
34835 register, so it is better to call __stack_chk_fail directly. */
34837 static tree ATTRIBUTE_UNUSED
34838 ix86_stack_protect_fail (void)
34840 return TARGET_64BIT
34841 ? default_external_stack_protect_fail ()
34842 : default_hidden_stack_protect_fail ();
34845 /* Select a format to encode pointers in exception handling data. CODE
34846 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
34847 true if the symbol may be affected by dynamic relocations.
34849 ??? All x86 object file formats are capable of representing this.
34850 After all, the relocation needed is the same as for the call insn.
34851 Whether or not a particular assembler allows us to enter such, I
34852 guess we'll have to see. */
34854 asm_preferred_eh_data_format (int code, int global)
34858 int type = DW_EH_PE_sdata8;
34860 || ix86_cmodel == CM_SMALL_PIC
34861 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
34862 type = DW_EH_PE_sdata4;
34863 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
34865 if (ix86_cmodel == CM_SMALL
34866 || (ix86_cmodel == CM_MEDIUM && code))
34867 return DW_EH_PE_udata4;
34868 return DW_EH_PE_absptr;
34871 /* Expand copysign from SIGN to the positive value ABS_VALUE
34872 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
34875 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
34877 enum machine_mode mode = GET_MODE (sign);
34878 rtx sgn = gen_reg_rtx (mode);
34879 if (mask == NULL_RTX)
34881 enum machine_mode vmode;
34883 if (mode == SFmode)
34885 else if (mode == DFmode)
34890 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
34891 if (!VECTOR_MODE_P (mode))
34893 /* We need to generate a scalar mode mask in this case. */
34894 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
34895 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
34896 mask = gen_reg_rtx (mode);
34897 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
34901 mask = gen_rtx_NOT (mode, mask);
34902 emit_insn (gen_rtx_SET (VOIDmode, sgn,
34903 gen_rtx_AND (mode, mask, sign)));
34904 emit_insn (gen_rtx_SET (VOIDmode, result,
34905 gen_rtx_IOR (mode, abs_value, sgn)));
34908 /* Expand fabs (OP0) and return a new rtx that holds the result. The
34909 mask for masking out the sign-bit is stored in *SMASK, if that is
34912 ix86_expand_sse_fabs (rtx op0, rtx *smask)
34914 enum machine_mode vmode, mode = GET_MODE (op0);
34917 xa = gen_reg_rtx (mode);
34918 if (mode == SFmode)
34920 else if (mode == DFmode)
34924 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
34925 if (!VECTOR_MODE_P (mode))
34927 /* We need to generate a scalar mode mask in this case. */
34928 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
34929 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
34930 mask = gen_reg_rtx (mode);
34931 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
34933 emit_insn (gen_rtx_SET (VOIDmode, xa,
34934 gen_rtx_AND (mode, op0, mask)));
34942 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
34943 swapping the operands if SWAP_OPERANDS is true. The expanded
34944 code is a forward jump to a newly created label in case the
34945 comparison is true. The generated label rtx is returned. */
34947 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
34948 bool swap_operands)
34959 label = gen_label_rtx ();
34960 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
34961 emit_insn (gen_rtx_SET (VOIDmode, tmp,
34962 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
34963 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
34964 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
34965 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
34966 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
34967 JUMP_LABEL (tmp) = label;
34972 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
34973 using comparison code CODE. Operands are swapped for the comparison if
34974 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
34976 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
34977 bool swap_operands)
34979 rtx (*insn)(rtx, rtx, rtx, rtx);
34980 enum machine_mode mode = GET_MODE (op0);
34981 rtx mask = gen_reg_rtx (mode);
34990 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
34992 emit_insn (insn (mask, op0, op1,
34993 gen_rtx_fmt_ee (code, mode, op0, op1)));
34997 /* Generate and return a rtx of mode MODE for 2**n where n is the number
34998 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
35000 ix86_gen_TWO52 (enum machine_mode mode)
35002 REAL_VALUE_TYPE TWO52r;
35005 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
35006 TWO52 = const_double_from_real_value (TWO52r, mode);
35007 TWO52 = force_reg (mode, TWO52);
35012 /* Expand SSE sequence for computing lround from OP1 storing
35015 ix86_expand_lround (rtx op0, rtx op1)
35017 /* C code for the stuff we're doing below:
35018 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
35021 enum machine_mode mode = GET_MODE (op1);
35022 const struct real_format *fmt;
35023 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35026 /* load nextafter (0.5, 0.0) */
35027 fmt = REAL_MODE_FORMAT (mode);
35028 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35029 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35031 /* adj = copysign (0.5, op1) */
35032 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
35033 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
35035 /* adj = op1 + adj */
35036 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
35038 /* op0 = (imode)adj */
35039 expand_fix (op0, adj, 0);
35042 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
35045 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
35047 /* C code for the stuff we're doing below (for do_floor):
35049 xi -= (double)xi > op1 ? 1 : 0;
35052 enum machine_mode fmode = GET_MODE (op1);
35053 enum machine_mode imode = GET_MODE (op0);
35054 rtx ireg, freg, label, tmp;
35056 /* reg = (long)op1 */
35057 ireg = gen_reg_rtx (imode);
35058 expand_fix (ireg, op1, 0);
35060 /* freg = (double)reg */
35061 freg = gen_reg_rtx (fmode);
35062 expand_float (freg, ireg, 0);
35064 /* ireg = (freg > op1) ? ireg - 1 : ireg */
35065 label = ix86_expand_sse_compare_and_jump (UNLE,
35066 freg, op1, !do_floor);
35067 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
35068 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
35069 emit_move_insn (ireg, tmp);
35071 emit_label (label);
35072 LABEL_NUSES (label) = 1;
35074 emit_move_insn (op0, ireg);
35077 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
35078 result in OPERAND0. */
35080 ix86_expand_rint (rtx operand0, rtx operand1)
35082 /* C code for the stuff we're doing below:
35083 xa = fabs (operand1);
35084 if (!isless (xa, 2**52))
35086 xa = xa + 2**52 - 2**52;
35087 return copysign (xa, operand1);
35089 enum machine_mode mode = GET_MODE (operand0);
35090 rtx res, xa, label, TWO52, mask;
35092 res = gen_reg_rtx (mode);
35093 emit_move_insn (res, operand1);
35095 /* xa = abs (operand1) */
35096 xa = ix86_expand_sse_fabs (res, &mask);
35098 /* if (!isless (xa, TWO52)) goto label; */
35099 TWO52 = ix86_gen_TWO52 (mode);
35100 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35102 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35103 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
35105 ix86_sse_copysign_to_positive (res, xa, res, mask);
35107 emit_label (label);
35108 LABEL_NUSES (label) = 1;
35110 emit_move_insn (operand0, res);
35113 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35116 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
35118 /* C code for the stuff we expand below.
35119 double xa = fabs (x), x2;
35120 if (!isless (xa, TWO52))
35122 xa = xa + TWO52 - TWO52;
35123 x2 = copysign (xa, x);
35132 enum machine_mode mode = GET_MODE (operand0);
35133 rtx xa, TWO52, tmp, label, one, res, mask;
35135 TWO52 = ix86_gen_TWO52 (mode);
35137 /* Temporary for holding the result, initialized to the input
35138 operand to ease control flow. */
35139 res = gen_reg_rtx (mode);
35140 emit_move_insn (res, operand1);
35142 /* xa = abs (operand1) */
35143 xa = ix86_expand_sse_fabs (res, &mask);
35145 /* if (!isless (xa, TWO52)) goto label; */
35146 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35148 /* xa = xa + TWO52 - TWO52; */
35149 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35150 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
35152 /* xa = copysign (xa, operand1) */
35153 ix86_sse_copysign_to_positive (xa, xa, res, mask);
35155 /* generate 1.0 or -1.0 */
35156 one = force_reg (mode,
35157 const_double_from_real_value (do_floor
35158 ? dconst1 : dconstm1, mode));
35160 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35161 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
35162 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35163 gen_rtx_AND (mode, one, tmp)));
35164 /* We always need to subtract here to preserve signed zero. */
35165 tmp = expand_simple_binop (mode, MINUS,
35166 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35167 emit_move_insn (res, tmp);
35169 emit_label (label);
35170 LABEL_NUSES (label) = 1;
35172 emit_move_insn (operand0, res);
35175 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35178 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
35180 /* C code for the stuff we expand below.
35181 double xa = fabs (x), x2;
35182 if (!isless (xa, TWO52))
35184 x2 = (double)(long)x;
35191 if (HONOR_SIGNED_ZEROS (mode))
35192 return copysign (x2, x);
35195 enum machine_mode mode = GET_MODE (operand0);
35196 rtx xa, xi, TWO52, tmp, label, one, res, mask;
35198 TWO52 = ix86_gen_TWO52 (mode);
35200 /* Temporary for holding the result, initialized to the input
35201 operand to ease control flow. */
35202 res = gen_reg_rtx (mode);
35203 emit_move_insn (res, operand1);
35205 /* xa = abs (operand1) */
35206 xa = ix86_expand_sse_fabs (res, &mask);
35208 /* if (!isless (xa, TWO52)) goto label; */
35209 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35211 /* xa = (double)(long)x */
35212 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35213 expand_fix (xi, res, 0);
35214 expand_float (xa, xi, 0);
35217 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
35219 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35220 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
35221 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35222 gen_rtx_AND (mode, one, tmp)));
35223 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
35224 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35225 emit_move_insn (res, tmp);
35227 if (HONOR_SIGNED_ZEROS (mode))
35228 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
35230 emit_label (label);
35231 LABEL_NUSES (label) = 1;
35233 emit_move_insn (operand0, res);
35236 /* Expand SSE sequence for computing round from OPERAND1 storing
35237 into OPERAND0. Sequence that works without relying on DImode truncation
35238 via cvttsd2siq that is only available on 64bit targets. */
35240 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
35242 /* C code for the stuff we expand below.
35243 double xa = fabs (x), xa2, x2;
35244 if (!isless (xa, TWO52))
35246 Using the absolute value and copying back sign makes
35247 -0.0 -> -0.0 correct.
35248 xa2 = xa + TWO52 - TWO52;
35253 else if (dxa > 0.5)
35255 x2 = copysign (xa2, x);
35258 enum machine_mode mode = GET_MODE (operand0);
35259 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
35261 TWO52 = ix86_gen_TWO52 (mode);
35263 /* Temporary for holding the result, initialized to the input
35264 operand to ease control flow. */
35265 res = gen_reg_rtx (mode);
35266 emit_move_insn (res, operand1);
35268 /* xa = abs (operand1) */
35269 xa = ix86_expand_sse_fabs (res, &mask);
35271 /* if (!isless (xa, TWO52)) goto label; */
35272 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35274 /* xa2 = xa + TWO52 - TWO52; */
35275 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35276 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
35278 /* dxa = xa2 - xa; */
35279 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
35281 /* generate 0.5, 1.0 and -0.5 */
35282 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
35283 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
35284 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
35288 tmp = gen_reg_rtx (mode);
35289 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
35290 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
35291 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35292 gen_rtx_AND (mode, one, tmp)));
35293 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35294 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
35295 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
35296 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35297 gen_rtx_AND (mode, one, tmp)));
35298 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35300 /* res = copysign (xa2, operand1) */
35301 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
35303 emit_label (label);
35304 LABEL_NUSES (label) = 1;
35306 emit_move_insn (operand0, res);
35309 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35312 ix86_expand_trunc (rtx operand0, rtx operand1)
35314 /* C code for SSE variant we expand below.
35315 double xa = fabs (x), x2;
35316 if (!isless (xa, TWO52))
35318 x2 = (double)(long)x;
35319 if (HONOR_SIGNED_ZEROS (mode))
35320 return copysign (x2, x);
35323 enum machine_mode mode = GET_MODE (operand0);
35324 rtx xa, xi, TWO52, label, res, mask;
35326 TWO52 = ix86_gen_TWO52 (mode);
35328 /* Temporary for holding the result, initialized to the input
35329 operand to ease control flow. */
35330 res = gen_reg_rtx (mode);
35331 emit_move_insn (res, operand1);
35333 /* xa = abs (operand1) */
35334 xa = ix86_expand_sse_fabs (res, &mask);
35336 /* if (!isless (xa, TWO52)) goto label; */
35337 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35339 /* x = (double)(long)x */
35340 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35341 expand_fix (xi, res, 0);
35342 expand_float (res, xi, 0);
35344 if (HONOR_SIGNED_ZEROS (mode))
35345 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
35347 emit_label (label);
35348 LABEL_NUSES (label) = 1;
35350 emit_move_insn (operand0, res);
35353 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35356 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
35358 enum machine_mode mode = GET_MODE (operand0);
35359 rtx xa, mask, TWO52, label, one, res, smask, tmp;
35361 /* C code for SSE variant we expand below.
35362 double xa = fabs (x), x2;
35363 if (!isless (xa, TWO52))
35365 xa2 = xa + TWO52 - TWO52;
35369 x2 = copysign (xa2, x);
35373 TWO52 = ix86_gen_TWO52 (mode);
35375 /* Temporary for holding the result, initialized to the input
35376 operand to ease control flow. */
35377 res = gen_reg_rtx (mode);
35378 emit_move_insn (res, operand1);
35380 /* xa = abs (operand1) */
35381 xa = ix86_expand_sse_fabs (res, &smask);
35383 /* if (!isless (xa, TWO52)) goto label; */
35384 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35386 /* res = xa + TWO52 - TWO52; */
35387 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35388 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
35389 emit_move_insn (res, tmp);
35392 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
35394 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
35395 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
35396 emit_insn (gen_rtx_SET (VOIDmode, mask,
35397 gen_rtx_AND (mode, mask, one)));
35398 tmp = expand_simple_binop (mode, MINUS,
35399 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
35400 emit_move_insn (res, tmp);
35402 /* res = copysign (res, operand1) */
35403 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
35405 emit_label (label);
35406 LABEL_NUSES (label) = 1;
35408 emit_move_insn (operand0, res);
35411 /* Expand SSE sequence for computing round from OPERAND1 storing
35414 ix86_expand_round (rtx operand0, rtx operand1)
35416 /* C code for the stuff we're doing below:
35417 double xa = fabs (x);
35418 if (!isless (xa, TWO52))
35420 xa = (double)(long)(xa + nextafter (0.5, 0.0));
35421 return copysign (xa, x);
35423 enum machine_mode mode = GET_MODE (operand0);
35424 rtx res, TWO52, xa, label, xi, half, mask;
35425 const struct real_format *fmt;
35426 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35428 /* Temporary for holding the result, initialized to the input
35429 operand to ease control flow. */
35430 res = gen_reg_rtx (mode);
35431 emit_move_insn (res, operand1);
35433 TWO52 = ix86_gen_TWO52 (mode);
35434 xa = ix86_expand_sse_fabs (res, &mask);
35435 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35437 /* load nextafter (0.5, 0.0) */
35438 fmt = REAL_MODE_FORMAT (mode);
35439 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35440 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35442 /* xa = xa + 0.5 */
35443 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
35444 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
35446 /* xa = (double)(int64_t)xa */
35447 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35448 expand_fix (xi, xa, 0);
35449 expand_float (xa, xi, 0);
35451 /* res = copysign (xa, operand1) */
35452 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
35454 emit_label (label);
35455 LABEL_NUSES (label) = 1;
35457 emit_move_insn (operand0, res);
35460 /* Expand SSE sequence for computing round
35461 from OP1 storing into OP0 using sse4 round insn. */
35463 ix86_expand_round_sse4 (rtx op0, rtx op1)
35465 enum machine_mode mode = GET_MODE (op0);
35466 rtx e1, e2, res, half;
35467 const struct real_format *fmt;
35468 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35469 rtx (*gen_copysign) (rtx, rtx, rtx);
35470 rtx (*gen_round) (rtx, rtx, rtx);
35475 gen_copysign = gen_copysignsf3;
35476 gen_round = gen_sse4_1_roundsf2;
35479 gen_copysign = gen_copysigndf3;
35480 gen_round = gen_sse4_1_rounddf2;
35483 gcc_unreachable ();
35486 /* round (a) = trunc (a + copysign (0.5, a)) */
35488 /* load nextafter (0.5, 0.0) */
35489 fmt = REAL_MODE_FORMAT (mode);
35490 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35491 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35492 half = const_double_from_real_value (pred_half, mode);
35494 /* e1 = copysign (0.5, op1) */
35495 e1 = gen_reg_rtx (mode);
35496 emit_insn (gen_copysign (e1, half, op1));
35498 /* e2 = op1 + e1 */
35499 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
35501 /* res = trunc (e2) */
35502 res = gen_reg_rtx (mode);
35503 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
35505 emit_move_insn (op0, res);
35509 /* Table of valid machine attributes. */
35510 static const struct attribute_spec ix86_attribute_table[] =
35512 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
35513 affects_type_identity } */
35514 /* Stdcall attribute says callee is responsible for popping arguments
35515 if they are not variable. */
35516 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35518 /* Fastcall attribute says callee is responsible for popping arguments
35519 if they are not variable. */
35520 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35522 /* Thiscall attribute says callee is responsible for popping arguments
35523 if they are not variable. */
35524 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35526 /* Cdecl attribute says the callee is a normal C declaration */
35527 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35529 /* Regparm attribute specifies how many integer arguments are to be
35530 passed in registers. */
35531 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
35533 /* Sseregparm attribute says we are using x86_64 calling conventions
35534 for FP arguments. */
35535 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35537 /* The transactional memory builtins are implicitly regparm or fastcall
35538 depending on the ABI. Override the generic do-nothing attribute that
35539 these builtins were declared with. */
35540 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
35542 /* force_align_arg_pointer says this function realigns the stack at entry. */
35543 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
35544 false, true, true, ix86_handle_cconv_attribute, false },
35545 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35546 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
35547 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
35548 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
35551 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
35553 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
35555 #ifdef SUBTARGET_ATTRIBUTE_TABLE
35556 SUBTARGET_ATTRIBUTE_TABLE,
35558 /* ms_abi and sysv_abi calling convention function attributes. */
35559 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
35560 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
35561 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
35563 { "callee_pop_aggregate_return", 1, 1, false, true, true,
35564 ix86_handle_callee_pop_aggregate_return, true },
35566 { NULL, 0, 0, false, false, false, NULL, false }
35569 /* Implement targetm.vectorize.builtin_vectorization_cost. */
35571 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
35572 tree vectype ATTRIBUTE_UNUSED,
35573 int misalign ATTRIBUTE_UNUSED)
35575 switch (type_of_cost)
35578 return ix86_cost->scalar_stmt_cost;
35581 return ix86_cost->scalar_load_cost;
35584 return ix86_cost->scalar_store_cost;
35587 return ix86_cost->vec_stmt_cost;
35590 return ix86_cost->vec_align_load_cost;
35593 return ix86_cost->vec_store_cost;
35595 case vec_to_scalar:
35596 return ix86_cost->vec_to_scalar_cost;
35598 case scalar_to_vec:
35599 return ix86_cost->scalar_to_vec_cost;
35601 case unaligned_load:
35602 case unaligned_store:
35603 return ix86_cost->vec_unalign_load_cost;
35605 case cond_branch_taken:
35606 return ix86_cost->cond_taken_branch_cost;
35608 case cond_branch_not_taken:
35609 return ix86_cost->cond_not_taken_branch_cost;
35612 case vec_promote_demote:
35613 return ix86_cost->vec_stmt_cost;
35616 gcc_unreachable ();
35620 /* Construct (set target (vec_select op0 (parallel perm))) and
35621 return true if that's a valid instruction in the active ISA. */
35624 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
35626 rtx rperm[MAX_VECT_LEN], x;
35629 for (i = 0; i < nelt; ++i)
35630 rperm[i] = GEN_INT (perm[i]);
35632 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
35633 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
35634 x = gen_rtx_SET (VOIDmode, target, x);
35637 if (recog_memoized (x) < 0)
35645 /* Similar, but generate a vec_concat from op0 and op1 as well. */
35648 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
35649 const unsigned char *perm, unsigned nelt)
35651 enum machine_mode v2mode;
35654 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
35655 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
35656 return expand_vselect (target, x, perm, nelt);
35659 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35660 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
35663 expand_vec_perm_blend (struct expand_vec_perm_d *d)
35665 enum machine_mode vmode = d->vmode;
35666 unsigned i, mask, nelt = d->nelt;
35667 rtx target, op0, op1, x;
35668 rtx rperm[32], vperm;
35670 if (d->op0 == d->op1)
35672 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
35674 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
35676 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
35681 /* This is a blend, not a permute. Elements must stay in their
35682 respective lanes. */
35683 for (i = 0; i < nelt; ++i)
35685 unsigned e = d->perm[i];
35686 if (!(e == i || e == i + nelt))
35693 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
35694 decision should be extracted elsewhere, so that we only try that
35695 sequence once all budget==3 options have been tried. */
35696 target = d->target;
35709 for (i = 0; i < nelt; ++i)
35710 mask |= (d->perm[i] >= nelt) << i;
35714 for (i = 0; i < 2; ++i)
35715 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
35720 for (i = 0; i < 4; ++i)
35721 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
35726 /* See if bytes move in pairs so we can use pblendw with
35727 an immediate argument, rather than pblendvb with a vector
35729 for (i = 0; i < 16; i += 2)
35730 if (d->perm[i] + 1 != d->perm[i + 1])
35733 for (i = 0; i < nelt; ++i)
35734 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
35737 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
35738 vperm = force_reg (vmode, vperm);
35740 if (GET_MODE_SIZE (vmode) == 16)
35741 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
35743 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
35747 for (i = 0; i < 8; ++i)
35748 mask |= (d->perm[i * 2] >= 16) << i;
35753 target = gen_lowpart (vmode, target);
35754 op0 = gen_lowpart (vmode, op0);
35755 op1 = gen_lowpart (vmode, op1);
35759 /* See if bytes move in pairs. If not, vpblendvb must be used. */
35760 for (i = 0; i < 32; i += 2)
35761 if (d->perm[i] + 1 != d->perm[i + 1])
35763 /* See if bytes move in quadruplets. If yes, vpblendd
35764 with immediate can be used. */
35765 for (i = 0; i < 32; i += 4)
35766 if (d->perm[i] + 2 != d->perm[i + 2])
35770 /* See if bytes move the same in both lanes. If yes,
35771 vpblendw with immediate can be used. */
35772 for (i = 0; i < 16; i += 2)
35773 if (d->perm[i] + 16 != d->perm[i + 16])
35776 /* Use vpblendw. */
35777 for (i = 0; i < 16; ++i)
35778 mask |= (d->perm[i * 2] >= 32) << i;
35783 /* Use vpblendd. */
35784 for (i = 0; i < 8; ++i)
35785 mask |= (d->perm[i * 4] >= 32) << i;
35790 /* See if words move in pairs. If yes, vpblendd can be used. */
35791 for (i = 0; i < 16; i += 2)
35792 if (d->perm[i] + 1 != d->perm[i + 1])
35796 /* See if words move the same in both lanes. If not,
35797 vpblendvb must be used. */
35798 for (i = 0; i < 8; i++)
35799 if (d->perm[i] + 8 != d->perm[i + 8])
35801 /* Use vpblendvb. */
35802 for (i = 0; i < 32; ++i)
35803 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
35807 target = gen_lowpart (vmode, target);
35808 op0 = gen_lowpart (vmode, op0);
35809 op1 = gen_lowpart (vmode, op1);
35810 goto finish_pblendvb;
35813 /* Use vpblendw. */
35814 for (i = 0; i < 16; ++i)
35815 mask |= (d->perm[i] >= 16) << i;
35819 /* Use vpblendd. */
35820 for (i = 0; i < 8; ++i)
35821 mask |= (d->perm[i * 2] >= 16) << i;
35826 /* Use vpblendd. */
35827 for (i = 0; i < 4; ++i)
35828 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
35833 gcc_unreachable ();
35836 /* This matches five different patterns with the different modes. */
35837 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
35838 x = gen_rtx_SET (VOIDmode, target, x);
35844 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35845 in terms of the variable form of vpermilps.
35847 Note that we will have already failed the immediate input vpermilps,
35848 which requires that the high and low part shuffle be identical; the
35849 variable form doesn't require that. */
35852 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
35854 rtx rperm[8], vperm;
35857 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
35860 /* We can only permute within the 128-bit lane. */
35861 for (i = 0; i < 8; ++i)
35863 unsigned e = d->perm[i];
35864 if (i < 4 ? e >= 4 : e < 4)
35871 for (i = 0; i < 8; ++i)
35873 unsigned e = d->perm[i];
35875 /* Within each 128-bit lane, the elements of op0 are numbered
35876 from 0 and the elements of op1 are numbered from 4. */
35882 rperm[i] = GEN_INT (e);
35885 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
35886 vperm = force_reg (V8SImode, vperm);
35887 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
35892 /* Return true if permutation D can be performed as VMODE permutation
35896 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
35898 unsigned int i, j, chunk;
35900 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
35901 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
35902 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
35905 if (GET_MODE_NUNITS (vmode) >= d->nelt)
35908 chunk = d->nelt / GET_MODE_NUNITS (vmode);
35909 for (i = 0; i < d->nelt; i += chunk)
35910 if (d->perm[i] & (chunk - 1))
35913 for (j = 1; j < chunk; ++j)
35914 if (d->perm[i] + j != d->perm[i + j])
35920 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35921 in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */
35924 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
35926 unsigned i, nelt, eltsz, mask;
35927 unsigned char perm[32];
35928 enum machine_mode vmode = V16QImode;
35929 rtx rperm[32], vperm, target, op0, op1;
35933 if (d->op0 != d->op1)
35935 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
35938 && valid_perm_using_mode_p (V2TImode, d))
35943 /* Use vperm2i128 insn. The pattern uses
35944 V4DImode instead of V2TImode. */
35945 target = gen_lowpart (V4DImode, d->target);
35946 op0 = gen_lowpart (V4DImode, d->op0);
35947 op1 = gen_lowpart (V4DImode, d->op1);
35949 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
35950 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
35951 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
35959 if (GET_MODE_SIZE (d->vmode) == 16)
35964 else if (GET_MODE_SIZE (d->vmode) == 32)
35969 /* V4DImode should be already handled through
35970 expand_vselect by vpermq instruction. */
35971 gcc_assert (d->vmode != V4DImode);
35974 if (d->vmode == V8SImode
35975 || d->vmode == V16HImode
35976 || d->vmode == V32QImode)
35978 /* First see if vpermq can be used for
35979 V8SImode/V16HImode/V32QImode. */
35980 if (valid_perm_using_mode_p (V4DImode, d))
35982 for (i = 0; i < 4; i++)
35983 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
35986 return expand_vselect (gen_lowpart (V4DImode, d->target),
35987 gen_lowpart (V4DImode, d->op0),
35991 /* Next see if vpermd can be used. */
35992 if (valid_perm_using_mode_p (V8SImode, d))
35996 if (vmode == V32QImode)
35998 /* vpshufb only works intra lanes, it is not
35999 possible to shuffle bytes in between the lanes. */
36000 for (i = 0; i < nelt; ++i)
36001 if ((d->perm[i] ^ i) & (nelt / 2))
36012 if (vmode == V8SImode)
36013 for (i = 0; i < 8; ++i)
36014 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
36017 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36018 if (d->op0 != d->op1)
36019 mask = 2 * nelt - 1;
36020 else if (vmode == V16QImode)
36023 mask = nelt / 2 - 1;
36025 for (i = 0; i < nelt; ++i)
36027 unsigned j, e = d->perm[i] & mask;
36028 for (j = 0; j < eltsz; ++j)
36029 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
36033 vperm = gen_rtx_CONST_VECTOR (vmode,
36034 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
36035 vperm = force_reg (vmode, vperm);
36037 target = gen_lowpart (vmode, d->target);
36038 op0 = gen_lowpart (vmode, d->op0);
36039 if (d->op0 == d->op1)
36041 if (vmode == V16QImode)
36042 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
36043 else if (vmode == V32QImode)
36044 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
36046 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
36050 op1 = gen_lowpart (vmode, d->op1);
36051 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
36057 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
36058 in a single instruction. */
36061 expand_vec_perm_1 (struct expand_vec_perm_d *d)
36063 unsigned i, nelt = d->nelt;
36064 unsigned char perm2[MAX_VECT_LEN];
36066 /* Check plain VEC_SELECT first, because AVX has instructions that could
36067 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
36068 input where SEL+CONCAT may not. */
36069 if (d->op0 == d->op1)
36071 int mask = nelt - 1;
36072 bool identity_perm = true;
36073 bool broadcast_perm = true;
36075 for (i = 0; i < nelt; i++)
36077 perm2[i] = d->perm[i] & mask;
36079 identity_perm = false;
36081 broadcast_perm = false;
36087 emit_move_insn (d->target, d->op0);
36090 else if (broadcast_perm && TARGET_AVX2)
36092 /* Use vpbroadcast{b,w,d}. */
36093 rtx op = d->op0, (*gen) (rtx, rtx) = NULL;
36097 op = gen_lowpart (V16QImode, op);
36098 gen = gen_avx2_pbroadcastv32qi;
36101 op = gen_lowpart (V8HImode, op);
36102 gen = gen_avx2_pbroadcastv16hi;
36105 op = gen_lowpart (V4SImode, op);
36106 gen = gen_avx2_pbroadcastv8si;
36109 gen = gen_avx2_pbroadcastv16qi;
36112 gen = gen_avx2_pbroadcastv8hi;
36114 /* For other modes prefer other shuffles this function creates. */
36120 emit_insn (gen (d->target, op));
36125 if (expand_vselect (d->target, d->op0, perm2, nelt))
36128 /* There are plenty of patterns in sse.md that are written for
36129 SEL+CONCAT and are not replicated for a single op. Perhaps
36130 that should be changed, to avoid the nastiness here. */
36132 /* Recognize interleave style patterns, which means incrementing
36133 every other permutation operand. */
36134 for (i = 0; i < nelt; i += 2)
36136 perm2[i] = d->perm[i] & mask;
36137 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
36139 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
36142 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
36145 for (i = 0; i < nelt; i += 4)
36147 perm2[i + 0] = d->perm[i + 0] & mask;
36148 perm2[i + 1] = d->perm[i + 1] & mask;
36149 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
36150 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
36153 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
36158 /* Finally, try the fully general two operand permute. */
36159 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
36162 /* Recognize interleave style patterns with reversed operands. */
36163 if (d->op0 != d->op1)
36165 for (i = 0; i < nelt; ++i)
36167 unsigned e = d->perm[i];
36175 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
36179 /* Try the SSE4.1 blend variable merge instructions. */
36180 if (expand_vec_perm_blend (d))
36183 /* Try one of the AVX vpermil variable permutations. */
36184 if (expand_vec_perm_vpermil (d))
36187 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
36188 vpshufb, vpermd or vpermq variable permutation. */
36189 if (expand_vec_perm_pshufb (d))
36195 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36196 in terms of a pair of pshuflw + pshufhw instructions. */
36199 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
36201 unsigned char perm2[MAX_VECT_LEN];
36205 if (d->vmode != V8HImode || d->op0 != d->op1)
36208 /* The two permutations only operate in 64-bit lanes. */
36209 for (i = 0; i < 4; ++i)
36210 if (d->perm[i] >= 4)
36212 for (i = 4; i < 8; ++i)
36213 if (d->perm[i] < 4)
36219 /* Emit the pshuflw. */
36220 memcpy (perm2, d->perm, 4);
36221 for (i = 4; i < 8; ++i)
36223 ok = expand_vselect (d->target, d->op0, perm2, 8);
36226 /* Emit the pshufhw. */
36227 memcpy (perm2 + 4, d->perm + 4, 4);
36228 for (i = 0; i < 4; ++i)
36230 ok = expand_vselect (d->target, d->target, perm2, 8);
36236 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36237 the permutation using the SSSE3 palignr instruction. This succeeds
36238 when all of the elements in PERM fit within one vector and we merely
36239 need to shift them down so that a single vector permutation has a
36240 chance to succeed. */
36243 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
36245 unsigned i, nelt = d->nelt;
36250 /* Even with AVX, palignr only operates on 128-bit vectors. */
36251 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
36254 min = nelt, max = 0;
36255 for (i = 0; i < nelt; ++i)
36257 unsigned e = d->perm[i];
36263 if (min == 0 || max - min >= nelt)
36266 /* Given that we have SSSE3, we know we'll be able to implement the
36267 single operand permutation after the palignr with pshufb. */
36271 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
36272 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
36273 gen_lowpart (TImode, d->op1),
36274 gen_lowpart (TImode, d->op0), shift));
36276 d->op0 = d->op1 = d->target;
36279 for (i = 0; i < nelt; ++i)
36281 unsigned e = d->perm[i] - min;
36287 /* Test for the degenerate case where the alignment by itself
36288 produces the desired permutation. */
36292 ok = expand_vec_perm_1 (d);
36298 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
36300 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36301 a two vector permutation into a single vector permutation by using
36302 an interleave operation to merge the vectors. */
36305 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
36307 struct expand_vec_perm_d dremap, dfinal;
36308 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
36309 unsigned HOST_WIDE_INT contents;
36310 unsigned char remap[2 * MAX_VECT_LEN];
36312 bool ok, same_halves = false;
36314 if (GET_MODE_SIZE (d->vmode) == 16)
36316 if (d->op0 == d->op1)
36319 else if (GET_MODE_SIZE (d->vmode) == 32)
36323 /* For 32-byte modes allow even d->op0 == d->op1.
36324 The lack of cross-lane shuffling in some instructions
36325 might prevent a single insn shuffle. */
36327 dfinal.testing_p = true;
36328 /* If expand_vec_perm_interleave3 can expand this into
36329 a 3 insn sequence, give up and let it be expanded as
36330 3 insn sequence. While that is one insn longer,
36331 it doesn't need a memory operand and in the common
36332 case that both interleave low and high permutations
36333 with the same operands are adjacent needs 4 insns
36334 for both after CSE. */
36335 if (expand_vec_perm_interleave3 (&dfinal))
36341 /* Examine from whence the elements come. */
36343 for (i = 0; i < nelt; ++i)
36344 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
36346 memset (remap, 0xff, sizeof (remap));
36349 if (GET_MODE_SIZE (d->vmode) == 16)
36351 unsigned HOST_WIDE_INT h1, h2, h3, h4;
36353 /* Split the two input vectors into 4 halves. */
36354 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
36359 /* If the elements from the low halves use interleave low, and similarly
36360 for interleave high. If the elements are from mis-matched halves, we
36361 can use shufps for V4SF/V4SI or do a DImode shuffle. */
36362 if ((contents & (h1 | h3)) == contents)
36365 for (i = 0; i < nelt2; ++i)
36368 remap[i + nelt] = i * 2 + 1;
36369 dremap.perm[i * 2] = i;
36370 dremap.perm[i * 2 + 1] = i + nelt;
36372 if (!TARGET_SSE2 && d->vmode == V4SImode)
36373 dremap.vmode = V4SFmode;
36375 else if ((contents & (h2 | h4)) == contents)
36378 for (i = 0; i < nelt2; ++i)
36380 remap[i + nelt2] = i * 2;
36381 remap[i + nelt + nelt2] = i * 2 + 1;
36382 dremap.perm[i * 2] = i + nelt2;
36383 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
36385 if (!TARGET_SSE2 && d->vmode == V4SImode)
36386 dremap.vmode = V4SFmode;
36388 else if ((contents & (h1 | h4)) == contents)
36391 for (i = 0; i < nelt2; ++i)
36394 remap[i + nelt + nelt2] = i + nelt2;
36395 dremap.perm[i] = i;
36396 dremap.perm[i + nelt2] = i + nelt + nelt2;
36401 dremap.vmode = V2DImode;
36403 dremap.perm[0] = 0;
36404 dremap.perm[1] = 3;
36407 else if ((contents & (h2 | h3)) == contents)
36410 for (i = 0; i < nelt2; ++i)
36412 remap[i + nelt2] = i;
36413 remap[i + nelt] = i + nelt2;
36414 dremap.perm[i] = i + nelt2;
36415 dremap.perm[i + nelt2] = i + nelt;
36420 dremap.vmode = V2DImode;
36422 dremap.perm[0] = 1;
36423 dremap.perm[1] = 2;
36431 unsigned int nelt4 = nelt / 4, nzcnt = 0;
36432 unsigned HOST_WIDE_INT q[8];
36433 unsigned int nonzero_halves[4];
36435 /* Split the two input vectors into 8 quarters. */
36436 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
36437 for (i = 1; i < 8; ++i)
36438 q[i] = q[0] << (nelt4 * i);
36439 for (i = 0; i < 4; ++i)
36440 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
36442 nonzero_halves[nzcnt] = i;
36448 gcc_assert (d->op0 == d->op1);
36449 nonzero_halves[1] = nonzero_halves[0];
36450 same_halves = true;
36452 else if (d->op0 == d->op1)
36454 gcc_assert (nonzero_halves[0] == 0);
36455 gcc_assert (nonzero_halves[1] == 1);
36460 if (d->perm[0] / nelt2 == nonzero_halves[1])
36462 /* Attempt to increase the likelyhood that dfinal
36463 shuffle will be intra-lane. */
36464 char tmph = nonzero_halves[0];
36465 nonzero_halves[0] = nonzero_halves[1];
36466 nonzero_halves[1] = tmph;
36469 /* vperm2f128 or vperm2i128. */
36470 for (i = 0; i < nelt2; ++i)
36472 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
36473 remap[i + nonzero_halves[0] * nelt2] = i;
36474 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
36475 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
36478 if (d->vmode != V8SFmode
36479 && d->vmode != V4DFmode
36480 && d->vmode != V8SImode)
36482 dremap.vmode = V8SImode;
36484 for (i = 0; i < 4; ++i)
36486 dremap.perm[i] = i + nonzero_halves[0] * 4;
36487 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
36491 else if (d->op0 == d->op1)
36493 else if (TARGET_AVX2
36494 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
36497 for (i = 0; i < nelt4; ++i)
36500 remap[i + nelt] = i * 2 + 1;
36501 remap[i + nelt2] = i * 2 + nelt2;
36502 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
36503 dremap.perm[i * 2] = i;
36504 dremap.perm[i * 2 + 1] = i + nelt;
36505 dremap.perm[i * 2 + nelt2] = i + nelt2;
36506 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
36509 else if (TARGET_AVX2
36510 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
36513 for (i = 0; i < nelt4; ++i)
36515 remap[i + nelt4] = i * 2;
36516 remap[i + nelt + nelt4] = i * 2 + 1;
36517 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
36518 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
36519 dremap.perm[i * 2] = i + nelt4;
36520 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
36521 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
36522 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
36529 /* Use the remapping array set up above to move the elements from their
36530 swizzled locations into their final destinations. */
36532 for (i = 0; i < nelt; ++i)
36534 unsigned e = remap[d->perm[i]];
36535 gcc_assert (e < nelt);
36536 /* If same_halves is true, both halves of the remapped vector are the
36537 same. Avoid cross-lane accesses if possible. */
36538 if (same_halves && i >= nelt2)
36540 gcc_assert (e < nelt2);
36541 dfinal.perm[i] = e + nelt2;
36544 dfinal.perm[i] = e;
36546 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
36547 dfinal.op1 = dfinal.op0;
36548 dremap.target = dfinal.op0;
36550 /* Test if the final remap can be done with a single insn. For V4SFmode or
36551 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
36553 ok = expand_vec_perm_1 (&dfinal);
36554 seq = get_insns ();
36563 if (dremap.vmode != dfinal.vmode)
36565 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
36566 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
36567 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
36570 ok = expand_vec_perm_1 (&dremap);
36577 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36578 a single vector cross-lane permutation into vpermq followed
36579 by any of the single insn permutations. */
36582 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
36584 struct expand_vec_perm_d dremap, dfinal;
36585 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
36586 unsigned contents[2];
36590 && (d->vmode == V32QImode || d->vmode == V16HImode)
36591 && d->op0 == d->op1))
36596 for (i = 0; i < nelt2; ++i)
36598 contents[0] |= 1u << (d->perm[i] / nelt4);
36599 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
36602 for (i = 0; i < 2; ++i)
36604 unsigned int cnt = 0;
36605 for (j = 0; j < 4; ++j)
36606 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
36614 dremap.vmode = V4DImode;
36616 dremap.target = gen_reg_rtx (V4DImode);
36617 dremap.op0 = gen_lowpart (V4DImode, d->op0);
36618 dremap.op1 = dremap.op0;
36619 for (i = 0; i < 2; ++i)
36621 unsigned int cnt = 0;
36622 for (j = 0; j < 4; ++j)
36623 if ((contents[i] & (1u << j)) != 0)
36624 dremap.perm[2 * i + cnt++] = j;
36625 for (; cnt < 2; ++cnt)
36626 dremap.perm[2 * i + cnt] = 0;
36630 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
36631 dfinal.op1 = dfinal.op0;
36632 for (i = 0, j = 0; i < nelt; ++i)
36636 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
36637 if ((d->perm[i] / nelt4) == dremap.perm[j])
36639 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
36640 dfinal.perm[i] |= nelt4;
36642 gcc_unreachable ();
36645 ok = expand_vec_perm_1 (&dremap);
36648 ok = expand_vec_perm_1 (&dfinal);
36654 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36655 a two vector permutation using 2 intra-lane interleave insns
36656 and cross-lane shuffle for 32-byte vectors. */
36659 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
36662 rtx (*gen) (rtx, rtx, rtx);
36664 if (d->op0 == d->op1)
36666 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
36668 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
36674 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
36676 for (i = 0; i < nelt; i += 2)
36677 if (d->perm[i] != d->perm[0] + i / 2
36678 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
36688 gen = gen_vec_interleave_highv32qi;
36690 gen = gen_vec_interleave_lowv32qi;
36694 gen = gen_vec_interleave_highv16hi;
36696 gen = gen_vec_interleave_lowv16hi;
36700 gen = gen_vec_interleave_highv8si;
36702 gen = gen_vec_interleave_lowv8si;
36706 gen = gen_vec_interleave_highv4di;
36708 gen = gen_vec_interleave_lowv4di;
36712 gen = gen_vec_interleave_highv8sf;
36714 gen = gen_vec_interleave_lowv8sf;
36718 gen = gen_vec_interleave_highv4df;
36720 gen = gen_vec_interleave_lowv4df;
36723 gcc_unreachable ();
36726 emit_insn (gen (d->target, d->op0, d->op1));
36730 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
36731 permutation with two pshufb insns and an ior. We should have already
36732 failed all two instruction sequences. */
36735 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
36737 rtx rperm[2][16], vperm, l, h, op, m128;
36738 unsigned int i, nelt, eltsz;
36740 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
36742 gcc_assert (d->op0 != d->op1);
36745 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36747 /* Generate two permutation masks. If the required element is within
36748 the given vector it is shuffled into the proper lane. If the required
36749 element is in the other vector, force a zero into the lane by setting
36750 bit 7 in the permutation mask. */
36751 m128 = GEN_INT (-128);
36752 for (i = 0; i < nelt; ++i)
36754 unsigned j, e = d->perm[i];
36755 unsigned which = (e >= nelt);
36759 for (j = 0; j < eltsz; ++j)
36761 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
36762 rperm[1-which][i*eltsz + j] = m128;
36766 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
36767 vperm = force_reg (V16QImode, vperm);
36769 l = gen_reg_rtx (V16QImode);
36770 op = gen_lowpart (V16QImode, d->op0);
36771 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
36773 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
36774 vperm = force_reg (V16QImode, vperm);
36776 h = gen_reg_rtx (V16QImode);
36777 op = gen_lowpart (V16QImode, d->op1);
36778 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
36780 op = gen_lowpart (V16QImode, d->target);
36781 emit_insn (gen_iorv16qi3 (op, l, h));
36786 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
36787 with two vpshufb insns, vpermq and vpor. We should have already failed
36788 all two or three instruction sequences. */
36791 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
36793 rtx rperm[2][32], vperm, l, h, hp, op, m128;
36794 unsigned int i, nelt, eltsz;
36797 || d->op0 != d->op1
36798 || (d->vmode != V32QImode && d->vmode != V16HImode))
36805 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36807 /* Generate two permutation masks. If the required element is within
36808 the same lane, it is shuffled in. If the required element from the
36809 other lane, force a zero by setting bit 7 in the permutation mask.
36810 In the other mask the mask has non-negative elements if element
36811 is requested from the other lane, but also moved to the other lane,
36812 so that the result of vpshufb can have the two V2TImode halves
36814 m128 = GEN_INT (-128);
36815 for (i = 0; i < nelt; ++i)
36817 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
36818 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
36820 for (j = 0; j < eltsz; ++j)
36822 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
36823 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
36827 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
36828 vperm = force_reg (V32QImode, vperm);
36830 h = gen_reg_rtx (V32QImode);
36831 op = gen_lowpart (V32QImode, d->op0);
36832 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
36834 /* Swap the 128-byte lanes of h into hp. */
36835 hp = gen_reg_rtx (V4DImode);
36836 op = gen_lowpart (V4DImode, h);
36837 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
36840 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
36841 vperm = force_reg (V32QImode, vperm);
36843 l = gen_reg_rtx (V32QImode);
36844 op = gen_lowpart (V32QImode, d->op0);
36845 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
36847 op = gen_lowpart (V32QImode, d->target);
36848 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
36853 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
36854 and extract-odd permutations of two V32QImode and V16QImode operand
36855 with two vpshufb insns, vpor and vpermq. We should have already
36856 failed all two or three instruction sequences. */
36859 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
36861 rtx rperm[2][32], vperm, l, h, ior, op, m128;
36862 unsigned int i, nelt, eltsz;
36865 || d->op0 == d->op1
36866 || (d->vmode != V32QImode && d->vmode != V16HImode))
36869 for (i = 0; i < d->nelt; ++i)
36870 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
36877 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36879 /* Generate two permutation masks. In the first permutation mask
36880 the first quarter will contain indexes for the first half
36881 of the op0, the second quarter will contain bit 7 set, third quarter
36882 will contain indexes for the second half of the op0 and the
36883 last quarter bit 7 set. In the second permutation mask
36884 the first quarter will contain bit 7 set, the second quarter
36885 indexes for the first half of the op1, the third quarter bit 7 set
36886 and last quarter indexes for the second half of the op1.
36887 I.e. the first mask e.g. for V32QImode extract even will be:
36888 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
36889 (all values masked with 0xf except for -128) and second mask
36890 for extract even will be
36891 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
36892 m128 = GEN_INT (-128);
36893 for (i = 0; i < nelt; ++i)
36895 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
36896 unsigned which = d->perm[i] >= nelt;
36897 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
36899 for (j = 0; j < eltsz; ++j)
36901 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
36902 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
36906 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
36907 vperm = force_reg (V32QImode, vperm);
36909 l = gen_reg_rtx (V32QImode);
36910 op = gen_lowpart (V32QImode, d->op0);
36911 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
36913 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
36914 vperm = force_reg (V32QImode, vperm);
36916 h = gen_reg_rtx (V32QImode);
36917 op = gen_lowpart (V32QImode, d->op1);
36918 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
36920 ior = gen_reg_rtx (V32QImode);
36921 emit_insn (gen_iorv32qi3 (ior, l, h));
36923 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
36924 op = gen_lowpart (V4DImode, d->target);
36925 ior = gen_lowpart (V4DImode, ior);
36926 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
36927 const1_rtx, GEN_INT (3)));
36932 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
36933 and extract-odd permutations. */
36936 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
36943 t1 = gen_reg_rtx (V4DFmode);
36944 t2 = gen_reg_rtx (V4DFmode);
36946 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
36947 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
36948 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
36950 /* Now an unpck[lh]pd will produce the result required. */
36952 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
36954 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
36960 int mask = odd ? 0xdd : 0x88;
36962 t1 = gen_reg_rtx (V8SFmode);
36963 t2 = gen_reg_rtx (V8SFmode);
36964 t3 = gen_reg_rtx (V8SFmode);
36966 /* Shuffle within the 128-bit lanes to produce:
36967 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
36968 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
36971 /* Shuffle the lanes around to produce:
36972 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
36973 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
36976 /* Shuffle within the 128-bit lanes to produce:
36977 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
36978 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
36980 /* Shuffle within the 128-bit lanes to produce:
36981 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
36982 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
36984 /* Shuffle the lanes around to produce:
36985 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
36986 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
36995 /* These are always directly implementable by expand_vec_perm_1. */
36996 gcc_unreachable ();
37000 return expand_vec_perm_pshufb2 (d);
37003 /* We need 2*log2(N)-1 operations to achieve odd/even
37004 with interleave. */
37005 t1 = gen_reg_rtx (V8HImode);
37006 t2 = gen_reg_rtx (V8HImode);
37007 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
37008 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
37009 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
37010 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
37012 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
37014 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
37021 return expand_vec_perm_pshufb2 (d);
37024 t1 = gen_reg_rtx (V16QImode);
37025 t2 = gen_reg_rtx (V16QImode);
37026 t3 = gen_reg_rtx (V16QImode);
37027 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
37028 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
37029 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
37030 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
37031 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
37032 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
37034 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
37036 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
37043 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
37048 struct expand_vec_perm_d d_copy = *d;
37049 d_copy.vmode = V4DFmode;
37050 d_copy.target = gen_lowpart (V4DFmode, d->target);
37051 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
37052 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
37053 return expand_vec_perm_even_odd_1 (&d_copy, odd);
37056 t1 = gen_reg_rtx (V4DImode);
37057 t2 = gen_reg_rtx (V4DImode);
37059 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37060 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
37061 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
37063 /* Now an vpunpck[lh]qdq will produce the result required. */
37065 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
37067 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
37074 struct expand_vec_perm_d d_copy = *d;
37075 d_copy.vmode = V8SFmode;
37076 d_copy.target = gen_lowpart (V8SFmode, d->target);
37077 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
37078 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
37079 return expand_vec_perm_even_odd_1 (&d_copy, odd);
37082 t1 = gen_reg_rtx (V8SImode);
37083 t2 = gen_reg_rtx (V8SImode);
37085 /* Shuffle the lanes around into
37086 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
37087 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t1),
37088 gen_lowpart (V4DImode, d->op0),
37089 gen_lowpart (V4DImode, d->op1),
37091 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t2),
37092 gen_lowpart (V4DImode, d->op0),
37093 gen_lowpart (V4DImode, d->op1),
37096 /* Swap the 2nd and 3rd position in each lane into
37097 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
37098 emit_insn (gen_avx2_pshufdv3 (t1, t1,
37099 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37100 emit_insn (gen_avx2_pshufdv3 (t2, t2,
37101 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37103 /* Now an vpunpck[lh]qdq will produce
37104 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
37106 t3 = gen_avx2_interleave_highv4di (gen_lowpart (V4DImode, d->target),
37107 gen_lowpart (V4DImode, t1),
37108 gen_lowpart (V4DImode, t2));
37110 t3 = gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode, d->target),
37111 gen_lowpart (V4DImode, t1),
37112 gen_lowpart (V4DImode, t2));
37117 gcc_unreachable ();
37123 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37124 extract-even and extract-odd permutations. */
37127 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
37129 unsigned i, odd, nelt = d->nelt;
37132 if (odd != 0 && odd != 1)
37135 for (i = 1; i < nelt; ++i)
37136 if (d->perm[i] != 2 * i + odd)
37139 return expand_vec_perm_even_odd_1 (d, odd);
37142 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
37143 permutations. We assume that expand_vec_perm_1 has already failed. */
37146 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
37148 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
37149 enum machine_mode vmode = d->vmode;
37150 unsigned char perm2[4];
37158 /* These are special-cased in sse.md so that we can optionally
37159 use the vbroadcast instruction. They expand to two insns
37160 if the input happens to be in a register. */
37161 gcc_unreachable ();
37167 /* These are always implementable using standard shuffle patterns. */
37168 gcc_unreachable ();
37172 /* These can be implemented via interleave. We save one insn by
37173 stopping once we have promoted to V4SImode and then use pshufd. */
37177 rtx (*gen) (rtx, rtx, rtx)
37178 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
37179 : gen_vec_interleave_lowv8hi;
37183 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
37184 : gen_vec_interleave_highv8hi;
37189 dest = gen_reg_rtx (vmode);
37190 emit_insn (gen (dest, op0, op0));
37191 vmode = get_mode_wider_vector (vmode);
37192 op0 = gen_lowpart (vmode, dest);
37194 while (vmode != V4SImode);
37196 memset (perm2, elt, 4);
37197 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
37205 /* For AVX2 broadcasts of the first element vpbroadcast* or
37206 vpermq should be used by expand_vec_perm_1. */
37207 gcc_assert (!TARGET_AVX2 || d->perm[0]);
37211 gcc_unreachable ();
37215 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37216 broadcast permutations. */
37219 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
37221 unsigned i, elt, nelt = d->nelt;
37223 if (d->op0 != d->op1)
37227 for (i = 1; i < nelt; ++i)
37228 if (d->perm[i] != elt)
37231 return expand_vec_perm_broadcast_1 (d);
37234 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
37235 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
37236 all the shorter instruction sequences. */
37239 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
37241 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
37242 unsigned int i, nelt, eltsz;
37246 || d->op0 == d->op1
37247 || (d->vmode != V32QImode && d->vmode != V16HImode))
37254 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
37256 /* Generate 4 permutation masks. If the required element is within
37257 the same lane, it is shuffled in. If the required element from the
37258 other lane, force a zero by setting bit 7 in the permutation mask.
37259 In the other mask the mask has non-negative elements if element
37260 is requested from the other lane, but also moved to the other lane,
37261 so that the result of vpshufb can have the two V2TImode halves
37263 m128 = GEN_INT (-128);
37264 for (i = 0; i < 32; ++i)
37266 rperm[0][i] = m128;
37267 rperm[1][i] = m128;
37268 rperm[2][i] = m128;
37269 rperm[3][i] = m128;
37275 for (i = 0; i < nelt; ++i)
37277 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
37278 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
37279 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
37281 for (j = 0; j < eltsz; ++j)
37282 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
37283 used[which] = true;
37286 for (i = 0; i < 2; ++i)
37288 if (!used[2 * i + 1])
37293 vperm = gen_rtx_CONST_VECTOR (V32QImode,
37294 gen_rtvec_v (32, rperm[2 * i + 1]));
37295 vperm = force_reg (V32QImode, vperm);
37296 h[i] = gen_reg_rtx (V32QImode);
37297 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
37298 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
37301 /* Swap the 128-byte lanes of h[X]. */
37302 for (i = 0; i < 2; ++i)
37304 if (h[i] == NULL_RTX)
37306 op = gen_reg_rtx (V4DImode);
37307 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
37308 const2_rtx, GEN_INT (3), const0_rtx,
37310 h[i] = gen_lowpart (V32QImode, op);
37313 for (i = 0; i < 2; ++i)
37320 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
37321 vperm = force_reg (V32QImode, vperm);
37322 l[i] = gen_reg_rtx (V32QImode);
37323 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
37324 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
37327 for (i = 0; i < 2; ++i)
37331 op = gen_reg_rtx (V32QImode);
37332 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
37339 gcc_assert (l[0] && l[1]);
37340 op = gen_lowpart (V32QImode, d->target);
37341 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
37345 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
37346 With all of the interface bits taken care of, perform the expansion
37347 in D and return true on success. */
37350 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
37352 /* Try a single instruction expansion. */
37353 if (expand_vec_perm_1 (d))
37356 /* Try sequences of two instructions. */
37358 if (expand_vec_perm_pshuflw_pshufhw (d))
37361 if (expand_vec_perm_palignr (d))
37364 if (expand_vec_perm_interleave2 (d))
37367 if (expand_vec_perm_broadcast (d))
37370 if (expand_vec_perm_vpermq_perm_1 (d))
37373 /* Try sequences of three instructions. */
37375 if (expand_vec_perm_pshufb2 (d))
37378 if (expand_vec_perm_interleave3 (d))
37381 /* Try sequences of four instructions. */
37383 if (expand_vec_perm_vpshufb2_vpermq (d))
37386 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
37389 /* ??? Look for narrow permutations whose element orderings would
37390 allow the promotion to a wider mode. */
37392 /* ??? Look for sequences of interleave or a wider permute that place
37393 the data into the correct lanes for a half-vector shuffle like
37394 pshuf[lh]w or vpermilps. */
37396 /* ??? Look for sequences of interleave that produce the desired results.
37397 The combinatorics of punpck[lh] get pretty ugly... */
37399 if (expand_vec_perm_even_odd (d))
37402 /* Even longer sequences. */
37403 if (expand_vec_perm_vpshufb4_vpermq2 (d))
37410 ix86_expand_vec_perm_const (rtx operands[4])
37412 struct expand_vec_perm_d d;
37413 unsigned char perm[MAX_VECT_LEN];
37414 int i, nelt, which;
37417 d.target = operands[0];
37418 d.op0 = operands[1];
37419 d.op1 = operands[2];
37422 d.vmode = GET_MODE (d.target);
37423 gcc_assert (VECTOR_MODE_P (d.vmode));
37424 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37425 d.testing_p = false;
37427 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
37428 gcc_assert (XVECLEN (sel, 0) == nelt);
37429 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
37431 for (i = which = 0; i < nelt; ++i)
37433 rtx e = XVECEXP (sel, 0, i);
37434 int ei = INTVAL (e) & (2 * nelt - 1);
37436 which |= (ei < nelt ? 1 : 2);
37447 if (!rtx_equal_p (d.op0, d.op1))
37450 /* The elements of PERM do not suggest that only the first operand
37451 is used, but both operands are identical. Allow easier matching
37452 of the permutation by folding the permutation into the single
37454 for (i = 0; i < nelt; ++i)
37455 if (d.perm[i] >= nelt)
37464 for (i = 0; i < nelt; ++i)
37470 if (ix86_expand_vec_perm_const_1 (&d))
37473 /* If the mask says both arguments are needed, but they are the same,
37474 the above tried to expand with d.op0 == d.op1. If that didn't work,
37475 retry with d.op0 != d.op1 as that is what testing has been done with. */
37476 if (which == 3 && d.op0 == d.op1)
37481 memcpy (d.perm, perm, sizeof (perm));
37482 d.op1 = gen_reg_rtx (d.vmode);
37484 ok = ix86_expand_vec_perm_const_1 (&d);
37485 seq = get_insns ();
37489 emit_move_insn (d.op1, d.op0);
37498 /* Implement targetm.vectorize.vec_perm_const_ok. */
37501 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
37502 const unsigned char *sel)
37504 struct expand_vec_perm_d d;
37505 unsigned int i, nelt, which;
37509 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37510 d.testing_p = true;
37512 /* Given sufficient ISA support we can just return true here
37513 for selected vector modes. */
37514 if (GET_MODE_SIZE (d.vmode) == 16)
37516 /* All implementable with a single vpperm insn. */
37519 /* All implementable with 2 pshufb + 1 ior. */
37522 /* All implementable with shufpd or unpck[lh]pd. */
37527 /* Extract the values from the vector CST into the permutation
37529 memcpy (d.perm, sel, nelt);
37530 for (i = which = 0; i < nelt; ++i)
37532 unsigned char e = d.perm[i];
37533 gcc_assert (e < 2 * nelt);
37534 which |= (e < nelt ? 1 : 2);
37537 /* For all elements from second vector, fold the elements to first. */
37539 for (i = 0; i < nelt; ++i)
37542 /* Check whether the mask can be applied to the vector type. */
37543 one_vec = (which != 3);
37545 /* Implementable with shufps or pshufd. */
37546 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
37549 /* Otherwise we have to go through the motions and see if we can
37550 figure out how to generate the requested permutation. */
37551 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
37552 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
37554 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
37557 ret = ix86_expand_vec_perm_const_1 (&d);
37564 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
37566 struct expand_vec_perm_d d;
37572 d.vmode = GET_MODE (targ);
37573 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37574 d.testing_p = false;
37576 for (i = 0; i < nelt; ++i)
37577 d.perm[i] = i * 2 + odd;
37579 /* We'll either be able to implement the permutation directly... */
37580 if (expand_vec_perm_1 (&d))
37583 /* ... or we use the special-case patterns. */
37584 expand_vec_perm_even_odd_1 (&d, odd);
37587 /* Expand an insert into a vector register through pinsr insn.
37588 Return true if successful. */
37591 ix86_expand_pinsr (rtx *operands)
37593 rtx dst = operands[0];
37594 rtx src = operands[3];
37596 unsigned int size = INTVAL (operands[1]);
37597 unsigned int pos = INTVAL (operands[2]);
37599 if (GET_CODE (dst) == SUBREG)
37601 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
37602 dst = SUBREG_REG (dst);
37605 if (GET_CODE (src) == SUBREG)
37606 src = SUBREG_REG (src);
37608 switch (GET_MODE (dst))
37615 enum machine_mode srcmode, dstmode;
37616 rtx (*pinsr)(rtx, rtx, rtx, rtx);
37618 srcmode = mode_for_size (size, MODE_INT, 0);
37623 if (!TARGET_SSE4_1)
37625 dstmode = V16QImode;
37626 pinsr = gen_sse4_1_pinsrb;
37632 dstmode = V8HImode;
37633 pinsr = gen_sse2_pinsrw;
37637 if (!TARGET_SSE4_1)
37639 dstmode = V4SImode;
37640 pinsr = gen_sse4_1_pinsrd;
37644 gcc_assert (TARGET_64BIT);
37645 if (!TARGET_SSE4_1)
37647 dstmode = V2DImode;
37648 pinsr = gen_sse4_1_pinsrq;
37655 dst = gen_lowpart (dstmode, dst);
37656 src = gen_lowpart (srcmode, src);
37660 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
37669 /* This function returns the calling abi specific va_list type node.
37670 It returns the FNDECL specific va_list type. */
37673 ix86_fn_abi_va_list (tree fndecl)
37676 return va_list_type_node;
37677 gcc_assert (fndecl != NULL_TREE);
37679 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
37680 return ms_va_list_type_node;
37682 return sysv_va_list_type_node;
37685 /* Returns the canonical va_list type specified by TYPE. If there
37686 is no valid TYPE provided, it return NULL_TREE. */
37689 ix86_canonical_va_list_type (tree type)
37693 /* Resolve references and pointers to va_list type. */
37694 if (TREE_CODE (type) == MEM_REF)
37695 type = TREE_TYPE (type);
37696 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
37697 type = TREE_TYPE (type);
37698 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
37699 type = TREE_TYPE (type);
37701 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
37703 wtype = va_list_type_node;
37704 gcc_assert (wtype != NULL_TREE);
37706 if (TREE_CODE (wtype) == ARRAY_TYPE)
37708 /* If va_list is an array type, the argument may have decayed
37709 to a pointer type, e.g. by being passed to another function.
37710 In that case, unwrap both types so that we can compare the
37711 underlying records. */
37712 if (TREE_CODE (htype) == ARRAY_TYPE
37713 || POINTER_TYPE_P (htype))
37715 wtype = TREE_TYPE (wtype);
37716 htype = TREE_TYPE (htype);
37719 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37720 return va_list_type_node;
37721 wtype = sysv_va_list_type_node;
37722 gcc_assert (wtype != NULL_TREE);
37724 if (TREE_CODE (wtype) == ARRAY_TYPE)
37726 /* If va_list is an array type, the argument may have decayed
37727 to a pointer type, e.g. by being passed to another function.
37728 In that case, unwrap both types so that we can compare the
37729 underlying records. */
37730 if (TREE_CODE (htype) == ARRAY_TYPE
37731 || POINTER_TYPE_P (htype))
37733 wtype = TREE_TYPE (wtype);
37734 htype = TREE_TYPE (htype);
37737 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37738 return sysv_va_list_type_node;
37739 wtype = ms_va_list_type_node;
37740 gcc_assert (wtype != NULL_TREE);
37742 if (TREE_CODE (wtype) == ARRAY_TYPE)
37744 /* If va_list is an array type, the argument may have decayed
37745 to a pointer type, e.g. by being passed to another function.
37746 In that case, unwrap both types so that we can compare the
37747 underlying records. */
37748 if (TREE_CODE (htype) == ARRAY_TYPE
37749 || POINTER_TYPE_P (htype))
37751 wtype = TREE_TYPE (wtype);
37752 htype = TREE_TYPE (htype);
37755 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37756 return ms_va_list_type_node;
37759 return std_canonical_va_list_type (type);
37762 /* Iterate through the target-specific builtin types for va_list.
37763 IDX denotes the iterator, *PTREE is set to the result type of
37764 the va_list builtin, and *PNAME to its internal type.
37765 Returns zero if there is no element for this index, otherwise
37766 IDX should be increased upon the next call.
37767 Note, do not iterate a base builtin's name like __builtin_va_list.
37768 Used from c_common_nodes_and_builtins. */
37771 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
37781 *ptree = ms_va_list_type_node;
37782 *pname = "__builtin_ms_va_list";
37786 *ptree = sysv_va_list_type_node;
37787 *pname = "__builtin_sysv_va_list";
37795 #undef TARGET_SCHED_DISPATCH
37796 #define TARGET_SCHED_DISPATCH has_dispatch
37797 #undef TARGET_SCHED_DISPATCH_DO
37798 #define TARGET_SCHED_DISPATCH_DO do_dispatch
37799 #undef TARGET_SCHED_REASSOCIATION_WIDTH
37800 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
37802 /* The size of the dispatch window is the total number of bytes of
37803 object code allowed in a window. */
37804 #define DISPATCH_WINDOW_SIZE 16
37806 /* Number of dispatch windows considered for scheduling. */
37807 #define MAX_DISPATCH_WINDOWS 3
37809 /* Maximum number of instructions in a window. */
37812 /* Maximum number of immediate operands in a window. */
37815 /* Maximum number of immediate bits allowed in a window. */
37816 #define MAX_IMM_SIZE 128
37818 /* Maximum number of 32 bit immediates allowed in a window. */
37819 #define MAX_IMM_32 4
37821 /* Maximum number of 64 bit immediates allowed in a window. */
37822 #define MAX_IMM_64 2
37824 /* Maximum total of loads or prefetches allowed in a window. */
37827 /* Maximum total of stores allowed in a window. */
37828 #define MAX_STORE 1
37834 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
37835 enum dispatch_group {
37850 /* Number of allowable groups in a dispatch window. It is an array
37851 indexed by dispatch_group enum. 100 is used as a big number,
37852 because the number of these kind of operations does not have any
37853 effect in dispatch window, but we need them for other reasons in
37855 static unsigned int num_allowable_groups[disp_last] = {
37856 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
37859 char group_name[disp_last + 1][16] = {
37860 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
37861 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
37862 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
37865 /* Instruction path. */
37868 path_single, /* Single micro op. */
37869 path_double, /* Double micro op. */
37870 path_multi, /* Instructions with more than 2 micro op.. */
37874 /* sched_insn_info defines a window to the instructions scheduled in
37875 the basic block. It contains a pointer to the insn_info table and
37876 the instruction scheduled.
37878 Windows are allocated for each basic block and are linked
37880 typedef struct sched_insn_info_s {
37882 enum dispatch_group group;
37883 enum insn_path path;
37888 /* Linked list of dispatch windows. This is a two way list of
37889 dispatch windows of a basic block. It contains information about
37890 the number of uops in the window and the total number of
37891 instructions and of bytes in the object code for this dispatch
37893 typedef struct dispatch_windows_s {
37894 int num_insn; /* Number of insn in the window. */
37895 int num_uops; /* Number of uops in the window. */
37896 int window_size; /* Number of bytes in the window. */
37897 int window_num; /* Window number between 0 or 1. */
37898 int num_imm; /* Number of immediates in an insn. */
37899 int num_imm_32; /* Number of 32 bit immediates in an insn. */
37900 int num_imm_64; /* Number of 64 bit immediates in an insn. */
37901 int imm_size; /* Total immediates in the window. */
37902 int num_loads; /* Total memory loads in the window. */
37903 int num_stores; /* Total memory stores in the window. */
37904 int violation; /* Violation exists in window. */
37905 sched_insn_info *window; /* Pointer to the window. */
37906 struct dispatch_windows_s *next;
37907 struct dispatch_windows_s *prev;
37908 } dispatch_windows;
37910 /* Immediate valuse used in an insn. */
37911 typedef struct imm_info_s
37918 static dispatch_windows *dispatch_window_list;
37919 static dispatch_windows *dispatch_window_list1;
37921 /* Get dispatch group of insn. */
37923 static enum dispatch_group
37924 get_mem_group (rtx insn)
37926 enum attr_memory memory;
37928 if (INSN_CODE (insn) < 0)
37929 return disp_no_group;
37930 memory = get_attr_memory (insn);
37931 if (memory == MEMORY_STORE)
37934 if (memory == MEMORY_LOAD)
37937 if (memory == MEMORY_BOTH)
37938 return disp_load_store;
37940 return disp_no_group;
37943 /* Return true if insn is a compare instruction. */
37948 enum attr_type type;
37950 type = get_attr_type (insn);
37951 return (type == TYPE_TEST
37952 || type == TYPE_ICMP
37953 || type == TYPE_FCMP
37954 || GET_CODE (PATTERN (insn)) == COMPARE);
37957 /* Return true if a dispatch violation encountered. */
37960 dispatch_violation (void)
37962 if (dispatch_window_list->next)
37963 return dispatch_window_list->next->violation;
37964 return dispatch_window_list->violation;
37967 /* Return true if insn is a branch instruction. */
37970 is_branch (rtx insn)
37972 return (CALL_P (insn) || JUMP_P (insn));
37975 /* Return true if insn is a prefetch instruction. */
37978 is_prefetch (rtx insn)
37980 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
37983 /* This function initializes a dispatch window and the list container holding a
37984 pointer to the window. */
37987 init_window (int window_num)
37990 dispatch_windows *new_list;
37992 if (window_num == 0)
37993 new_list = dispatch_window_list;
37995 new_list = dispatch_window_list1;
37997 new_list->num_insn = 0;
37998 new_list->num_uops = 0;
37999 new_list->window_size = 0;
38000 new_list->next = NULL;
38001 new_list->prev = NULL;
38002 new_list->window_num = window_num;
38003 new_list->num_imm = 0;
38004 new_list->num_imm_32 = 0;
38005 new_list->num_imm_64 = 0;
38006 new_list->imm_size = 0;
38007 new_list->num_loads = 0;
38008 new_list->num_stores = 0;
38009 new_list->violation = false;
38011 for (i = 0; i < MAX_INSN; i++)
38013 new_list->window[i].insn = NULL;
38014 new_list->window[i].group = disp_no_group;
38015 new_list->window[i].path = no_path;
38016 new_list->window[i].byte_len = 0;
38017 new_list->window[i].imm_bytes = 0;
38022 /* This function allocates and initializes a dispatch window and the
38023 list container holding a pointer to the window. */
38025 static dispatch_windows *
38026 allocate_window (void)
38028 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
38029 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
38034 /* This routine initializes the dispatch scheduling information. It
38035 initiates building dispatch scheduler tables and constructs the
38036 first dispatch window. */
38039 init_dispatch_sched (void)
38041 /* Allocate a dispatch list and a window. */
38042 dispatch_window_list = allocate_window ();
38043 dispatch_window_list1 = allocate_window ();
38048 /* This function returns true if a branch is detected. End of a basic block
38049 does not have to be a branch, but here we assume only branches end a
38053 is_end_basic_block (enum dispatch_group group)
38055 return group == disp_branch;
38058 /* This function is called when the end of a window processing is reached. */
38061 process_end_window (void)
38063 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
38064 if (dispatch_window_list->next)
38066 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
38067 gcc_assert (dispatch_window_list->window_size
38068 + dispatch_window_list1->window_size <= 48);
38074 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
38075 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
38076 for 48 bytes of instructions. Note that these windows are not dispatch
38077 windows that their sizes are DISPATCH_WINDOW_SIZE. */
38079 static dispatch_windows *
38080 allocate_next_window (int window_num)
38082 if (window_num == 0)
38084 if (dispatch_window_list->next)
38087 return dispatch_window_list;
38090 dispatch_window_list->next = dispatch_window_list1;
38091 dispatch_window_list1->prev = dispatch_window_list;
38093 return dispatch_window_list1;
38096 /* Increment the number of immediate operands of an instruction. */
38099 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
38104 switch ( GET_CODE (*in_rtx))
38109 (imm_values->imm)++;
38110 if (x86_64_immediate_operand (*in_rtx, SImode))
38111 (imm_values->imm32)++;
38113 (imm_values->imm64)++;
38117 (imm_values->imm)++;
38118 (imm_values->imm64)++;
38122 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
38124 (imm_values->imm)++;
38125 (imm_values->imm32)++;
38136 /* Compute number of immediate operands of an instruction. */
38139 find_constant (rtx in_rtx, imm_info *imm_values)
38141 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
38142 (rtx_function) find_constant_1, (void *) imm_values);
38145 /* Return total size of immediate operands of an instruction along with number
38146 of corresponding immediate-operands. It initializes its parameters to zero
38147 befor calling FIND_CONSTANT.
38148 INSN is the input instruction. IMM is the total of immediates.
38149 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
38153 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
38155 imm_info imm_values = {0, 0, 0};
38157 find_constant (insn, &imm_values);
38158 *imm = imm_values.imm;
38159 *imm32 = imm_values.imm32;
38160 *imm64 = imm_values.imm64;
38161 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
38164 /* This function indicates if an operand of an instruction is an
38168 has_immediate (rtx insn)
38170 int num_imm_operand;
38171 int num_imm32_operand;
38172 int num_imm64_operand;
38175 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38176 &num_imm64_operand);
38180 /* Return single or double path for instructions. */
38182 static enum insn_path
38183 get_insn_path (rtx insn)
38185 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
38187 if ((int)path == 0)
38188 return path_single;
38190 if ((int)path == 1)
38191 return path_double;
38196 /* Return insn dispatch group. */
38198 static enum dispatch_group
38199 get_insn_group (rtx insn)
38201 enum dispatch_group group = get_mem_group (insn);
38205 if (is_branch (insn))
38206 return disp_branch;
38211 if (has_immediate (insn))
38214 if (is_prefetch (insn))
38215 return disp_prefetch;
38217 return disp_no_group;
38220 /* Count number of GROUP restricted instructions in a dispatch
38221 window WINDOW_LIST. */
38224 count_num_restricted (rtx insn, dispatch_windows *window_list)
38226 enum dispatch_group group = get_insn_group (insn);
38228 int num_imm_operand;
38229 int num_imm32_operand;
38230 int num_imm64_operand;
38232 if (group == disp_no_group)
38235 if (group == disp_imm)
38237 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38238 &num_imm64_operand);
38239 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
38240 || num_imm_operand + window_list->num_imm > MAX_IMM
38241 || (num_imm32_operand > 0
38242 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
38243 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
38244 || (num_imm64_operand > 0
38245 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
38246 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
38247 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
38248 && num_imm64_operand > 0
38249 && ((window_list->num_imm_64 > 0
38250 && window_list->num_insn >= 2)
38251 || window_list->num_insn >= 3)))
38257 if ((group == disp_load_store
38258 && (window_list->num_loads >= MAX_LOAD
38259 || window_list->num_stores >= MAX_STORE))
38260 || ((group == disp_load
38261 || group == disp_prefetch)
38262 && window_list->num_loads >= MAX_LOAD)
38263 || (group == disp_store
38264 && window_list->num_stores >= MAX_STORE))
38270 /* This function returns true if insn satisfies dispatch rules on the
38271 last window scheduled. */
38274 fits_dispatch_window (rtx insn)
38276 dispatch_windows *window_list = dispatch_window_list;
38277 dispatch_windows *window_list_next = dispatch_window_list->next;
38278 unsigned int num_restrict;
38279 enum dispatch_group group = get_insn_group (insn);
38280 enum insn_path path = get_insn_path (insn);
38283 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
38284 instructions should be given the lowest priority in the
38285 scheduling process in Haifa scheduler to make sure they will be
38286 scheduled in the same dispatch window as the refrence to them. */
38287 if (group == disp_jcc || group == disp_cmp)
38290 /* Check nonrestricted. */
38291 if (group == disp_no_group || group == disp_branch)
38294 /* Get last dispatch window. */
38295 if (window_list_next)
38296 window_list = window_list_next;
38298 if (window_list->window_num == 1)
38300 sum = window_list->prev->window_size + window_list->window_size;
38303 || (min_insn_size (insn) + sum) >= 48)
38304 /* Window 1 is full. Go for next window. */
38308 num_restrict = count_num_restricted (insn, window_list);
38310 if (num_restrict > num_allowable_groups[group])
38313 /* See if it fits in the first window. */
38314 if (window_list->window_num == 0)
38316 /* The first widow should have only single and double path
38318 if (path == path_double
38319 && (window_list->num_uops + 2) > MAX_INSN)
38321 else if (path != path_single)
38327 /* Add an instruction INSN with NUM_UOPS micro-operations to the
38328 dispatch window WINDOW_LIST. */
38331 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
38333 int byte_len = min_insn_size (insn);
38334 int num_insn = window_list->num_insn;
38336 sched_insn_info *window = window_list->window;
38337 enum dispatch_group group = get_insn_group (insn);
38338 enum insn_path path = get_insn_path (insn);
38339 int num_imm_operand;
38340 int num_imm32_operand;
38341 int num_imm64_operand;
38343 if (!window_list->violation && group != disp_cmp
38344 && !fits_dispatch_window (insn))
38345 window_list->violation = true;
38347 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38348 &num_imm64_operand);
38350 /* Initialize window with new instruction. */
38351 window[num_insn].insn = insn;
38352 window[num_insn].byte_len = byte_len;
38353 window[num_insn].group = group;
38354 window[num_insn].path = path;
38355 window[num_insn].imm_bytes = imm_size;
38357 window_list->window_size += byte_len;
38358 window_list->num_insn = num_insn + 1;
38359 window_list->num_uops = window_list->num_uops + num_uops;
38360 window_list->imm_size += imm_size;
38361 window_list->num_imm += num_imm_operand;
38362 window_list->num_imm_32 += num_imm32_operand;
38363 window_list->num_imm_64 += num_imm64_operand;
38365 if (group == disp_store)
38366 window_list->num_stores += 1;
38367 else if (group == disp_load
38368 || group == disp_prefetch)
38369 window_list->num_loads += 1;
38370 else if (group == disp_load_store)
38372 window_list->num_stores += 1;
38373 window_list->num_loads += 1;
38377 /* Adds a scheduled instruction, INSN, to the current dispatch window.
38378 If the total bytes of instructions or the number of instructions in
38379 the window exceed allowable, it allocates a new window. */
38382 add_to_dispatch_window (rtx insn)
38385 dispatch_windows *window_list;
38386 dispatch_windows *next_list;
38387 dispatch_windows *window0_list;
38388 enum insn_path path;
38389 enum dispatch_group insn_group;
38397 if (INSN_CODE (insn) < 0)
38400 byte_len = min_insn_size (insn);
38401 window_list = dispatch_window_list;
38402 next_list = window_list->next;
38403 path = get_insn_path (insn);
38404 insn_group = get_insn_group (insn);
38406 /* Get the last dispatch window. */
38408 window_list = dispatch_window_list->next;
38410 if (path == path_single)
38412 else if (path == path_double)
38415 insn_num_uops = (int) path;
38417 /* If current window is full, get a new window.
38418 Window number zero is full, if MAX_INSN uops are scheduled in it.
38419 Window number one is full, if window zero's bytes plus window
38420 one's bytes is 32, or if the bytes of the new instruction added
38421 to the total makes it greater than 48, or it has already MAX_INSN
38422 instructions in it. */
38423 num_insn = window_list->num_insn;
38424 num_uops = window_list->num_uops;
38425 window_num = window_list->window_num;
38426 insn_fits = fits_dispatch_window (insn);
38428 if (num_insn >= MAX_INSN
38429 || num_uops + insn_num_uops > MAX_INSN
38432 window_num = ~window_num & 1;
38433 window_list = allocate_next_window (window_num);
38436 if (window_num == 0)
38438 add_insn_window (insn, window_list, insn_num_uops);
38439 if (window_list->num_insn >= MAX_INSN
38440 && insn_group == disp_branch)
38442 process_end_window ();
38446 else if (window_num == 1)
38448 window0_list = window_list->prev;
38449 sum = window0_list->window_size + window_list->window_size;
38451 || (byte_len + sum) >= 48)
38453 process_end_window ();
38454 window_list = dispatch_window_list;
38457 add_insn_window (insn, window_list, insn_num_uops);
38460 gcc_unreachable ();
38462 if (is_end_basic_block (insn_group))
38464 /* End of basic block is reached do end-basic-block process. */
38465 process_end_window ();
38470 /* Print the dispatch window, WINDOW_NUM, to FILE. */
38472 DEBUG_FUNCTION static void
38473 debug_dispatch_window_file (FILE *file, int window_num)
38475 dispatch_windows *list;
38478 if (window_num == 0)
38479 list = dispatch_window_list;
38481 list = dispatch_window_list1;
38483 fprintf (file, "Window #%d:\n", list->window_num);
38484 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
38485 list->num_insn, list->num_uops, list->window_size);
38486 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38487 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
38489 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
38491 fprintf (file, " insn info:\n");
38493 for (i = 0; i < MAX_INSN; i++)
38495 if (!list->window[i].insn)
38497 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
38498 i, group_name[list->window[i].group],
38499 i, (void *)list->window[i].insn,
38500 i, list->window[i].path,
38501 i, list->window[i].byte_len,
38502 i, list->window[i].imm_bytes);
38506 /* Print to stdout a dispatch window. */
38508 DEBUG_FUNCTION void
38509 debug_dispatch_window (int window_num)
38511 debug_dispatch_window_file (stdout, window_num);
38514 /* Print INSN dispatch information to FILE. */
38516 DEBUG_FUNCTION static void
38517 debug_insn_dispatch_info_file (FILE *file, rtx insn)
38520 enum insn_path path;
38521 enum dispatch_group group;
38523 int num_imm_operand;
38524 int num_imm32_operand;
38525 int num_imm64_operand;
38527 if (INSN_CODE (insn) < 0)
38530 byte_len = min_insn_size (insn);
38531 path = get_insn_path (insn);
38532 group = get_insn_group (insn);
38533 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38534 &num_imm64_operand);
38536 fprintf (file, " insn info:\n");
38537 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
38538 group_name[group], path, byte_len);
38539 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38540 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
38543 /* Print to STDERR the status of the ready list with respect to
38544 dispatch windows. */
38546 DEBUG_FUNCTION void
38547 debug_ready_dispatch (void)
38550 int no_ready = number_in_ready ();
38552 fprintf (stdout, "Number of ready: %d\n", no_ready);
38554 for (i = 0; i < no_ready; i++)
38555 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
38558 /* This routine is the driver of the dispatch scheduler. */
38561 do_dispatch (rtx insn, int mode)
38563 if (mode == DISPATCH_INIT)
38564 init_dispatch_sched ();
38565 else if (mode == ADD_TO_DISPATCH_WINDOW)
38566 add_to_dispatch_window (insn);
38569 /* Return TRUE if Dispatch Scheduling is supported. */
38572 has_dispatch (rtx insn, int action)
38574 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
38575 && flag_dispatch_scheduler)
38581 case IS_DISPATCH_ON:
38586 return is_cmp (insn);
38588 case DISPATCH_VIOLATION:
38589 return dispatch_violation ();
38591 case FITS_DISPATCH_WINDOW:
38592 return fits_dispatch_window (insn);
38598 /* Implementation of reassociation_width target hook used by
38599 reassoc phase to identify parallelism level in reassociated
38600 tree. Statements tree_code is passed in OPC. Arguments type
38603 Currently parallel reassociation is enabled for Atom
38604 processors only and we set reassociation width to be 2
38605 because Atom may issue up to 2 instructions per cycle.
38607 Return value should be fixed if parallel reassociation is
38608 enabled for other processors. */
38611 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
38612 enum machine_mode mode)
38616 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
38618 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
38624 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
38625 place emms and femms instructions. */
38627 static enum machine_mode
38628 ix86_preferred_simd_mode (enum machine_mode mode)
38636 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
38638 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
38640 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
38642 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
38645 if (TARGET_AVX && !TARGET_PREFER_AVX128)
38651 if (!TARGET_VECTORIZE_DOUBLE)
38653 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
38655 else if (TARGET_SSE2)
38664 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
38667 static unsigned int
38668 ix86_autovectorize_vector_sizes (void)
38670 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
38673 /* Initialize the GCC target structure. */
38674 #undef TARGET_RETURN_IN_MEMORY
38675 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
38677 #undef TARGET_LEGITIMIZE_ADDRESS
38678 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
38680 #undef TARGET_ATTRIBUTE_TABLE
38681 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
38682 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38683 # undef TARGET_MERGE_DECL_ATTRIBUTES
38684 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
38687 #undef TARGET_COMP_TYPE_ATTRIBUTES
38688 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
38690 #undef TARGET_INIT_BUILTINS
38691 #define TARGET_INIT_BUILTINS ix86_init_builtins
38692 #undef TARGET_BUILTIN_DECL
38693 #define TARGET_BUILTIN_DECL ix86_builtin_decl
38694 #undef TARGET_EXPAND_BUILTIN
38695 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
38697 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
38698 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
38699 ix86_builtin_vectorized_function
38701 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
38702 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
38704 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
38705 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
38707 #undef TARGET_VECTORIZE_BUILTIN_GATHER
38708 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
38710 #undef TARGET_BUILTIN_RECIPROCAL
38711 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
38713 #undef TARGET_ASM_FUNCTION_EPILOGUE
38714 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
38716 #undef TARGET_ENCODE_SECTION_INFO
38717 #ifndef SUBTARGET_ENCODE_SECTION_INFO
38718 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
38720 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
38723 #undef TARGET_ASM_OPEN_PAREN
38724 #define TARGET_ASM_OPEN_PAREN ""
38725 #undef TARGET_ASM_CLOSE_PAREN
38726 #define TARGET_ASM_CLOSE_PAREN ""
38728 #undef TARGET_ASM_BYTE_OP
38729 #define TARGET_ASM_BYTE_OP ASM_BYTE
38731 #undef TARGET_ASM_ALIGNED_HI_OP
38732 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
38733 #undef TARGET_ASM_ALIGNED_SI_OP
38734 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
38736 #undef TARGET_ASM_ALIGNED_DI_OP
38737 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
38740 #undef TARGET_PROFILE_BEFORE_PROLOGUE
38741 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
38743 #undef TARGET_ASM_UNALIGNED_HI_OP
38744 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
38745 #undef TARGET_ASM_UNALIGNED_SI_OP
38746 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
38747 #undef TARGET_ASM_UNALIGNED_DI_OP
38748 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
38750 #undef TARGET_PRINT_OPERAND
38751 #define TARGET_PRINT_OPERAND ix86_print_operand
38752 #undef TARGET_PRINT_OPERAND_ADDRESS
38753 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
38754 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
38755 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
38756 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
38757 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
38759 #undef TARGET_SCHED_INIT_GLOBAL
38760 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
38761 #undef TARGET_SCHED_ADJUST_COST
38762 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
38763 #undef TARGET_SCHED_ISSUE_RATE
38764 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
38765 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
38766 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
38767 ia32_multipass_dfa_lookahead
38769 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
38770 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
38773 #undef TARGET_HAVE_TLS
38774 #define TARGET_HAVE_TLS true
38776 #undef TARGET_CANNOT_FORCE_CONST_MEM
38777 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
38778 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
38779 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
38781 #undef TARGET_DELEGITIMIZE_ADDRESS
38782 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
38784 #undef TARGET_MS_BITFIELD_LAYOUT_P
38785 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
38788 #undef TARGET_BINDS_LOCAL_P
38789 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
38791 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38792 #undef TARGET_BINDS_LOCAL_P
38793 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
38796 #undef TARGET_ASM_OUTPUT_MI_THUNK
38797 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
38798 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
38799 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
38801 #undef TARGET_ASM_FILE_START
38802 #define TARGET_ASM_FILE_START x86_file_start
38804 #undef TARGET_OPTION_OVERRIDE
38805 #define TARGET_OPTION_OVERRIDE ix86_option_override
38807 #undef TARGET_REGISTER_MOVE_COST
38808 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
38809 #undef TARGET_MEMORY_MOVE_COST
38810 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
38811 #undef TARGET_RTX_COSTS
38812 #define TARGET_RTX_COSTS ix86_rtx_costs
38813 #undef TARGET_ADDRESS_COST
38814 #define TARGET_ADDRESS_COST ix86_address_cost
38816 #undef TARGET_FIXED_CONDITION_CODE_REGS
38817 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
38818 #undef TARGET_CC_MODES_COMPATIBLE
38819 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
38821 #undef TARGET_MACHINE_DEPENDENT_REORG
38822 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
38824 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
38825 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
38827 #undef TARGET_BUILD_BUILTIN_VA_LIST
38828 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
38830 #undef TARGET_ENUM_VA_LIST_P
38831 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
38833 #undef TARGET_FN_ABI_VA_LIST
38834 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
38836 #undef TARGET_CANONICAL_VA_LIST_TYPE
38837 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
38839 #undef TARGET_EXPAND_BUILTIN_VA_START
38840 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
38842 #undef TARGET_MD_ASM_CLOBBERS
38843 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
38845 #undef TARGET_PROMOTE_PROTOTYPES
38846 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
38847 #undef TARGET_STRUCT_VALUE_RTX
38848 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
38849 #undef TARGET_SETUP_INCOMING_VARARGS
38850 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
38851 #undef TARGET_MUST_PASS_IN_STACK
38852 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
38853 #undef TARGET_FUNCTION_ARG_ADVANCE
38854 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
38855 #undef TARGET_FUNCTION_ARG
38856 #define TARGET_FUNCTION_ARG ix86_function_arg
38857 #undef TARGET_FUNCTION_ARG_BOUNDARY
38858 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
38859 #undef TARGET_PASS_BY_REFERENCE
38860 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
38861 #undef TARGET_INTERNAL_ARG_POINTER
38862 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
38863 #undef TARGET_UPDATE_STACK_BOUNDARY
38864 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
38865 #undef TARGET_GET_DRAP_RTX
38866 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
38867 #undef TARGET_STRICT_ARGUMENT_NAMING
38868 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
38869 #undef TARGET_STATIC_CHAIN
38870 #define TARGET_STATIC_CHAIN ix86_static_chain
38871 #undef TARGET_TRAMPOLINE_INIT
38872 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
38873 #undef TARGET_RETURN_POPS_ARGS
38874 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
38876 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
38877 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
38879 #undef TARGET_SCALAR_MODE_SUPPORTED_P
38880 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
38882 #undef TARGET_VECTOR_MODE_SUPPORTED_P
38883 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
38885 #undef TARGET_C_MODE_FOR_SUFFIX
38886 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
38889 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
38890 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
38893 #ifdef SUBTARGET_INSERT_ATTRIBUTES
38894 #undef TARGET_INSERT_ATTRIBUTES
38895 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
38898 #undef TARGET_MANGLE_TYPE
38899 #define TARGET_MANGLE_TYPE ix86_mangle_type
38902 #undef TARGET_STACK_PROTECT_FAIL
38903 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
38906 #undef TARGET_FUNCTION_VALUE
38907 #define TARGET_FUNCTION_VALUE ix86_function_value
38909 #undef TARGET_FUNCTION_VALUE_REGNO_P
38910 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
38912 #undef TARGET_PROMOTE_FUNCTION_MODE
38913 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
38915 #undef TARGET_INSTANTIATE_DECLS
38916 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
38918 #undef TARGET_SECONDARY_RELOAD
38919 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
38921 #undef TARGET_CLASS_MAX_NREGS
38922 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
38924 #undef TARGET_PREFERRED_RELOAD_CLASS
38925 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
38926 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
38927 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
38928 #undef TARGET_CLASS_LIKELY_SPILLED_P
38929 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
38931 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
38932 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
38933 ix86_builtin_vectorization_cost
38934 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
38935 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
38936 ix86_vectorize_vec_perm_const_ok
38937 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
38938 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
38939 ix86_preferred_simd_mode
38940 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
38941 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
38942 ix86_autovectorize_vector_sizes
38944 #undef TARGET_SET_CURRENT_FUNCTION
38945 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
38947 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
38948 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
38950 #undef TARGET_OPTION_SAVE
38951 #define TARGET_OPTION_SAVE ix86_function_specific_save
38953 #undef TARGET_OPTION_RESTORE
38954 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
38956 #undef TARGET_OPTION_PRINT
38957 #define TARGET_OPTION_PRINT ix86_function_specific_print
38959 #undef TARGET_CAN_INLINE_P
38960 #define TARGET_CAN_INLINE_P ix86_can_inline_p
38962 #undef TARGET_EXPAND_TO_RTL_HOOK
38963 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
38965 #undef TARGET_LEGITIMATE_ADDRESS_P
38966 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
38968 #undef TARGET_LEGITIMATE_CONSTANT_P
38969 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
38971 #undef TARGET_FRAME_POINTER_REQUIRED
38972 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
38974 #undef TARGET_CAN_ELIMINATE
38975 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
38977 #undef TARGET_EXTRA_LIVE_ON_ENTRY
38978 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
38980 #undef TARGET_ASM_CODE_END
38981 #define TARGET_ASM_CODE_END ix86_code_end
38983 #undef TARGET_CONDITIONAL_REGISTER_USAGE
38984 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
38987 #undef TARGET_INIT_LIBFUNCS
38988 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
38991 struct gcc_target targetm = TARGET_INITIALIZER;
38993 #include "gt-i386.h"