1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
55 #include "tm-constrs.h"
59 #include "sched-int.h"
63 #include "diagnostic.h"
65 enum upper_128bits_state
72 typedef struct block_info_def
74 /* State of the upper 128bits of AVX registers at exit. */
75 enum upper_128bits_state state;
76 /* TRUE if state of the upper 128bits of AVX registers is unchanged
79 /* TRUE if block has been processed. */
81 /* TRUE if block has been scanned. */
83 /* Previous state of the upper 128bits of AVX registers at entry. */
84 enum upper_128bits_state prev;
87 #define BLOCK_INFO(B) ((block_info) (B)->aux)
89 enum call_avx256_state
91 /* Callee returns 256bit AVX register. */
92 callee_return_avx256 = -1,
93 /* Callee returns and passes 256bit AVX register. */
94 callee_return_pass_avx256,
95 /* Callee passes 256bit AVX register. */
97 /* Callee doesn't return nor passe 256bit AVX register, or no
98 256bit AVX register in function return. */
100 /* vzeroupper intrinsic. */
104 /* Check if a 256bit AVX register is referenced in stores. */
107 check_avx256_stores (rtx dest, const_rtx set, void *data)
110 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
111 || (GET_CODE (set) == SET
112 && REG_P (SET_SRC (set))
113 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
115 enum upper_128bits_state *state
116 = (enum upper_128bits_state *) data;
121 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
122 in basic block BB. Delete it if upper 128bit AVX registers are
123 unused. If it isn't deleted, move it to just before a jump insn.
125 STATE is state of the upper 128bits of AVX registers at entry. */
128 move_or_delete_vzeroupper_2 (basic_block bb,
129 enum upper_128bits_state state)
132 rtx vzeroupper_insn = NULL_RTX;
137 if (BLOCK_INFO (bb)->unchanged)
140 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
143 BLOCK_INFO (bb)->state = state;
147 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
150 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
151 bb->index, BLOCK_INFO (bb)->state);
155 BLOCK_INFO (bb)->prev = state;
158 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
163 /* BB_END changes when it is deleted. */
164 bb_end = BB_END (bb);
166 while (insn != bb_end)
168 insn = NEXT_INSN (insn);
170 if (!NONDEBUG_INSN_P (insn))
173 /* Move vzeroupper before jump/call. */
174 if (JUMP_P (insn) || CALL_P (insn))
176 if (!vzeroupper_insn)
179 if (PREV_INSN (insn) != vzeroupper_insn)
183 fprintf (dump_file, "Move vzeroupper after:\n");
184 print_rtl_single (dump_file, PREV_INSN (insn));
185 fprintf (dump_file, "before:\n");
186 print_rtl_single (dump_file, insn);
188 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
191 vzeroupper_insn = NULL_RTX;
195 pat = PATTERN (insn);
197 /* Check insn for vzeroupper intrinsic. */
198 if (GET_CODE (pat) == UNSPEC_VOLATILE
199 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
203 /* Found vzeroupper intrinsic. */
204 fprintf (dump_file, "Found vzeroupper:\n");
205 print_rtl_single (dump_file, insn);
210 /* Check insn for vzeroall intrinsic. */
211 if (GET_CODE (pat) == PARALLEL
212 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
213 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
218 /* Delete pending vzeroupper insertion. */
221 delete_insn (vzeroupper_insn);
222 vzeroupper_insn = NULL_RTX;
225 else if (state != used)
227 note_stores (pat, check_avx256_stores, &state);
234 /* Process vzeroupper intrinsic. */
235 avx256 = INTVAL (XVECEXP (pat, 0, 0));
239 /* Since the upper 128bits are cleared, callee must not pass
240 256bit AVX register. We only need to check if callee
241 returns 256bit AVX register. */
242 if (avx256 == callee_return_avx256)
248 /* Remove unnecessary vzeroupper since upper 128bits are
252 fprintf (dump_file, "Delete redundant vzeroupper:\n");
253 print_rtl_single (dump_file, insn);
259 /* Set state to UNUSED if callee doesn't return 256bit AVX
261 if (avx256 != callee_return_pass_avx256)
264 if (avx256 == callee_return_pass_avx256
265 || avx256 == callee_pass_avx256)
267 /* Must remove vzeroupper since callee passes in 256bit
271 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
272 print_rtl_single (dump_file, insn);
278 vzeroupper_insn = insn;
284 BLOCK_INFO (bb)->state = state;
285 BLOCK_INFO (bb)->unchanged = unchanged;
286 BLOCK_INFO (bb)->scanned = true;
289 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
290 bb->index, unchanged ? "unchanged" : "changed",
294 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
295 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
296 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
300 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
304 enum upper_128bits_state state, old_state, new_state;
308 fprintf (dump_file, " Process [bb %i]: status: %d\n",
309 block->index, BLOCK_INFO (block)->processed);
311 if (BLOCK_INFO (block)->processed)
316 /* Check all predecessor edges of this block. */
317 seen_unknown = false;
318 FOR_EACH_EDGE (e, ei, block->preds)
322 switch (BLOCK_INFO (e->src)->state)
325 if (!unknown_is_unused)
339 old_state = BLOCK_INFO (block)->state;
340 move_or_delete_vzeroupper_2 (block, state);
341 new_state = BLOCK_INFO (block)->state;
343 if (state != unknown || new_state == used)
344 BLOCK_INFO (block)->processed = true;
346 /* Need to rescan if the upper 128bits of AVX registers are changed
348 if (new_state != old_state)
350 if (new_state == used)
351 cfun->machine->rescan_vzeroupper_p = 1;
358 /* Go through the instruction stream looking for vzeroupper. Delete
359 it if upper 128bit AVX registers are unused. If it isn't deleted,
360 move it to just before a jump insn. */
363 move_or_delete_vzeroupper (void)
368 fibheap_t worklist, pending, fibheap_swap;
369 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
374 /* Set up block info for each basic block. */
375 alloc_aux_for_blocks (sizeof (struct block_info_def));
377 /* Process outgoing edges of entry point. */
379 fprintf (dump_file, "Process outgoing edges of entry point\n");
381 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
383 move_or_delete_vzeroupper_2 (e->dest,
384 cfun->machine->caller_pass_avx256_p
386 BLOCK_INFO (e->dest)->processed = true;
389 /* Compute reverse completion order of depth first search of the CFG
390 so that the data-flow runs faster. */
391 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
392 bb_order = XNEWVEC (int, last_basic_block);
393 pre_and_rev_post_order_compute (NULL, rc_order, false);
394 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
395 bb_order[rc_order[i]] = i;
398 worklist = fibheap_new ();
399 pending = fibheap_new ();
400 visited = sbitmap_alloc (last_basic_block);
401 in_worklist = sbitmap_alloc (last_basic_block);
402 in_pending = sbitmap_alloc (last_basic_block);
403 sbitmap_zero (in_worklist);
405 /* Don't check outgoing edges of entry point. */
406 sbitmap_ones (in_pending);
408 if (BLOCK_INFO (bb)->processed)
409 RESET_BIT (in_pending, bb->index);
412 move_or_delete_vzeroupper_1 (bb, false);
413 fibheap_insert (pending, bb_order[bb->index], bb);
417 fprintf (dump_file, "Check remaining basic blocks\n");
419 while (!fibheap_empty (pending))
421 fibheap_swap = pending;
423 worklist = fibheap_swap;
424 sbitmap_swap = in_pending;
425 in_pending = in_worklist;
426 in_worklist = sbitmap_swap;
428 sbitmap_zero (visited);
430 cfun->machine->rescan_vzeroupper_p = 0;
432 while (!fibheap_empty (worklist))
434 bb = (basic_block) fibheap_extract_min (worklist);
435 RESET_BIT (in_worklist, bb->index);
436 gcc_assert (!TEST_BIT (visited, bb->index));
437 if (!TEST_BIT (visited, bb->index))
441 SET_BIT (visited, bb->index);
443 if (move_or_delete_vzeroupper_1 (bb, false))
444 FOR_EACH_EDGE (e, ei, bb->succs)
446 if (e->dest == EXIT_BLOCK_PTR
447 || BLOCK_INFO (e->dest)->processed)
450 if (TEST_BIT (visited, e->dest->index))
452 if (!TEST_BIT (in_pending, e->dest->index))
454 /* Send E->DEST to next round. */
455 SET_BIT (in_pending, e->dest->index);
456 fibheap_insert (pending,
457 bb_order[e->dest->index],
461 else if (!TEST_BIT (in_worklist, e->dest->index))
463 /* Add E->DEST to current round. */
464 SET_BIT (in_worklist, e->dest->index);
465 fibheap_insert (worklist, bb_order[e->dest->index],
472 if (!cfun->machine->rescan_vzeroupper_p)
477 fibheap_delete (worklist);
478 fibheap_delete (pending);
479 sbitmap_free (visited);
480 sbitmap_free (in_worklist);
481 sbitmap_free (in_pending);
484 fprintf (dump_file, "Process remaining basic blocks\n");
487 move_or_delete_vzeroupper_1 (bb, true);
489 free_aux_for_blocks ();
492 static rtx legitimize_dllimport_symbol (rtx, bool);
494 #ifndef CHECK_STACK_LIMIT
495 #define CHECK_STACK_LIMIT (-1)
498 /* Return index of given mode in mult and division cost tables. */
499 #define MODE_INDEX(mode) \
500 ((mode) == QImode ? 0 \
501 : (mode) == HImode ? 1 \
502 : (mode) == SImode ? 2 \
503 : (mode) == DImode ? 3 \
506 /* Processor costs (relative to an add) */
507 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
508 #define COSTS_N_BYTES(N) ((N) * 2)
510 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
513 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
514 COSTS_N_BYTES (2), /* cost of an add instruction */
515 COSTS_N_BYTES (3), /* cost of a lea instruction */
516 COSTS_N_BYTES (2), /* variable shift costs */
517 COSTS_N_BYTES (3), /* constant shift costs */
518 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
519 COSTS_N_BYTES (3), /* HI */
520 COSTS_N_BYTES (3), /* SI */
521 COSTS_N_BYTES (3), /* DI */
522 COSTS_N_BYTES (5)}, /* other */
523 0, /* cost of multiply per each bit set */
524 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
525 COSTS_N_BYTES (3), /* HI */
526 COSTS_N_BYTES (3), /* SI */
527 COSTS_N_BYTES (3), /* DI */
528 COSTS_N_BYTES (5)}, /* other */
529 COSTS_N_BYTES (3), /* cost of movsx */
530 COSTS_N_BYTES (3), /* cost of movzx */
531 0, /* "large" insn */
533 2, /* cost for loading QImode using movzbl */
534 {2, 2, 2}, /* cost of loading integer registers
535 in QImode, HImode and SImode.
536 Relative to reg-reg move (2). */
537 {2, 2, 2}, /* cost of storing integer registers */
538 2, /* cost of reg,reg fld/fst */
539 {2, 2, 2}, /* cost of loading fp registers
540 in SFmode, DFmode and XFmode */
541 {2, 2, 2}, /* cost of storing fp registers
542 in SFmode, DFmode and XFmode */
543 3, /* cost of moving MMX register */
544 {3, 3}, /* cost of loading MMX registers
545 in SImode and DImode */
546 {3, 3}, /* cost of storing MMX registers
547 in SImode and DImode */
548 3, /* cost of moving SSE register */
549 {3, 3, 3}, /* cost of loading SSE registers
550 in SImode, DImode and TImode */
551 {3, 3, 3}, /* cost of storing SSE registers
552 in SImode, DImode and TImode */
553 3, /* MMX or SSE register to integer */
554 0, /* size of l1 cache */
555 0, /* size of l2 cache */
556 0, /* size of prefetch block */
557 0, /* number of parallel prefetches */
559 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
561 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
562 COSTS_N_BYTES (2), /* cost of FABS instruction. */
563 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
564 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
565 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
566 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
567 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
568 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 1, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 1, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
582 /* Processor costs (relative to an add) */
584 struct processor_costs i386_cost = { /* 386 specific costs */
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (1), /* cost of a lea instruction */
587 COSTS_N_INSNS (3), /* variable shift costs */
588 COSTS_N_INSNS (2), /* constant shift costs */
589 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (6), /* HI */
591 COSTS_N_INSNS (6), /* SI */
592 COSTS_N_INSNS (6), /* DI */
593 COSTS_N_INSNS (6)}, /* other */
594 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (23), /* HI */
597 COSTS_N_INSNS (23), /* SI */
598 COSTS_N_INSNS (23), /* DI */
599 COSTS_N_INSNS (23)}, /* other */
600 COSTS_N_INSNS (3), /* cost of movsx */
601 COSTS_N_INSNS (2), /* cost of movzx */
602 15, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {2, 4, 2}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {2, 4, 2}, /* cost of storing integer registers */
609 2, /* cost of reg,reg fld/fst */
610 {8, 8, 8}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {8, 8, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 8}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 8}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 8, 16}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 8, 16}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 3, /* MMX or SSE register to integer */
625 0, /* size of l1 cache */
626 0, /* size of l2 cache */
627 0, /* size of prefetch block */
628 0, /* number of parallel prefetches */
630 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (22), /* cost of FABS instruction. */
634 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
636 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
637 DUMMY_STRINGOP_ALGS},
638 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
639 DUMMY_STRINGOP_ALGS},
640 1, /* scalar_stmt_cost. */
641 1, /* scalar load_cost. */
642 1, /* scalar_store_cost. */
643 1, /* vec_stmt_cost. */
644 1, /* vec_to_scalar_cost. */
645 1, /* scalar_to_vec_cost. */
646 1, /* vec_align_load_cost. */
647 2, /* vec_unalign_load_cost. */
648 1, /* vec_store_cost. */
649 3, /* cond_taken_branch_cost. */
650 1, /* cond_not_taken_branch_cost. */
654 struct processor_costs i486_cost = { /* 486 specific costs */
655 COSTS_N_INSNS (1), /* cost of an add instruction */
656 COSTS_N_INSNS (1), /* cost of a lea instruction */
657 COSTS_N_INSNS (3), /* variable shift costs */
658 COSTS_N_INSNS (2), /* constant shift costs */
659 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
660 COSTS_N_INSNS (12), /* HI */
661 COSTS_N_INSNS (12), /* SI */
662 COSTS_N_INSNS (12), /* DI */
663 COSTS_N_INSNS (12)}, /* other */
664 1, /* cost of multiply per each bit set */
665 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
666 COSTS_N_INSNS (40), /* HI */
667 COSTS_N_INSNS (40), /* SI */
668 COSTS_N_INSNS (40), /* DI */
669 COSTS_N_INSNS (40)}, /* other */
670 COSTS_N_INSNS (3), /* cost of movsx */
671 COSTS_N_INSNS (2), /* cost of movzx */
672 15, /* "large" insn */
674 4, /* cost for loading QImode using movzbl */
675 {2, 4, 2}, /* cost of loading integer registers
676 in QImode, HImode and SImode.
677 Relative to reg-reg move (2). */
678 {2, 4, 2}, /* cost of storing integer registers */
679 2, /* cost of reg,reg fld/fst */
680 {8, 8, 8}, /* cost of loading fp registers
681 in SFmode, DFmode and XFmode */
682 {8, 8, 8}, /* cost of storing fp registers
683 in SFmode, DFmode and XFmode */
684 2, /* cost of moving MMX register */
685 {4, 8}, /* cost of loading MMX registers
686 in SImode and DImode */
687 {4, 8}, /* cost of storing MMX registers
688 in SImode and DImode */
689 2, /* cost of moving SSE register */
690 {4, 8, 16}, /* cost of loading SSE registers
691 in SImode, DImode and TImode */
692 {4, 8, 16}, /* cost of storing SSE registers
693 in SImode, DImode and TImode */
694 3, /* MMX or SSE register to integer */
695 4, /* size of l1 cache. 486 has 8kB cache
696 shared for code and data, so 4kB is
697 not really precise. */
698 4, /* size of l2 cache */
699 0, /* size of prefetch block */
700 0, /* number of parallel prefetches */
702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
703 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
704 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
705 COSTS_N_INSNS (3), /* cost of FABS instruction. */
706 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
707 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
708 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
709 DUMMY_STRINGOP_ALGS},
710 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
711 DUMMY_STRINGOP_ALGS},
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
726 struct processor_costs pentium_cost = {
727 COSTS_N_INSNS (1), /* cost of an add instruction */
728 COSTS_N_INSNS (1), /* cost of a lea instruction */
729 COSTS_N_INSNS (4), /* variable shift costs */
730 COSTS_N_INSNS (1), /* constant shift costs */
731 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
732 COSTS_N_INSNS (11), /* HI */
733 COSTS_N_INSNS (11), /* SI */
734 COSTS_N_INSNS (11), /* DI */
735 COSTS_N_INSNS (11)}, /* other */
736 0, /* cost of multiply per each bit set */
737 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
738 COSTS_N_INSNS (25), /* HI */
739 COSTS_N_INSNS (25), /* SI */
740 COSTS_N_INSNS (25), /* DI */
741 COSTS_N_INSNS (25)}, /* other */
742 COSTS_N_INSNS (3), /* cost of movsx */
743 COSTS_N_INSNS (2), /* cost of movzx */
744 8, /* "large" insn */
746 6, /* cost for loading QImode using movzbl */
747 {2, 4, 2}, /* cost of loading integer registers
748 in QImode, HImode and SImode.
749 Relative to reg-reg move (2). */
750 {2, 4, 2}, /* cost of storing integer registers */
751 2, /* cost of reg,reg fld/fst */
752 {2, 2, 6}, /* cost of loading fp registers
753 in SFmode, DFmode and XFmode */
754 {4, 4, 6}, /* cost of storing fp registers
755 in SFmode, DFmode and XFmode */
756 8, /* cost of moving MMX register */
757 {8, 8}, /* cost of loading MMX registers
758 in SImode and DImode */
759 {8, 8}, /* cost of storing MMX registers
760 in SImode and DImode */
761 2, /* cost of moving SSE register */
762 {4, 8, 16}, /* cost of loading SSE registers
763 in SImode, DImode and TImode */
764 {4, 8, 16}, /* cost of storing SSE registers
765 in SImode, DImode and TImode */
766 3, /* MMX or SSE register to integer */
767 8, /* size of l1 cache. */
768 8, /* size of l2 cache */
769 0, /* size of prefetch block */
770 0, /* number of parallel prefetches */
772 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
773 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
774 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
775 COSTS_N_INSNS (1), /* cost of FABS instruction. */
776 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
777 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
778 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
779 DUMMY_STRINGOP_ALGS},
780 {{libcall, {{-1, rep_prefix_4_byte}}},
781 DUMMY_STRINGOP_ALGS},
782 1, /* scalar_stmt_cost. */
783 1, /* scalar load_cost. */
784 1, /* scalar_store_cost. */
785 1, /* vec_stmt_cost. */
786 1, /* vec_to_scalar_cost. */
787 1, /* scalar_to_vec_cost. */
788 1, /* vec_align_load_cost. */
789 2, /* vec_unalign_load_cost. */
790 1, /* vec_store_cost. */
791 3, /* cond_taken_branch_cost. */
792 1, /* cond_not_taken_branch_cost. */
796 struct processor_costs pentiumpro_cost = {
797 COSTS_N_INSNS (1), /* cost of an add instruction */
798 COSTS_N_INSNS (1), /* cost of a lea instruction */
799 COSTS_N_INSNS (1), /* variable shift costs */
800 COSTS_N_INSNS (1), /* constant shift costs */
801 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
802 COSTS_N_INSNS (4), /* HI */
803 COSTS_N_INSNS (4), /* SI */
804 COSTS_N_INSNS (4), /* DI */
805 COSTS_N_INSNS (4)}, /* other */
806 0, /* cost of multiply per each bit set */
807 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
808 COSTS_N_INSNS (17), /* HI */
809 COSTS_N_INSNS (17), /* SI */
810 COSTS_N_INSNS (17), /* DI */
811 COSTS_N_INSNS (17)}, /* other */
812 COSTS_N_INSNS (1), /* cost of movsx */
813 COSTS_N_INSNS (1), /* cost of movzx */
814 8, /* "large" insn */
816 2, /* cost for loading QImode using movzbl */
817 {4, 4, 4}, /* cost of loading integer registers
818 in QImode, HImode and SImode.
819 Relative to reg-reg move (2). */
820 {2, 2, 2}, /* cost of storing integer registers */
821 2, /* cost of reg,reg fld/fst */
822 {2, 2, 6}, /* cost of loading fp registers
823 in SFmode, DFmode and XFmode */
824 {4, 4, 6}, /* cost of storing fp registers
825 in SFmode, DFmode and XFmode */
826 2, /* cost of moving MMX register */
827 {2, 2}, /* cost of loading MMX registers
828 in SImode and DImode */
829 {2, 2}, /* cost of storing MMX registers
830 in SImode and DImode */
831 2, /* cost of moving SSE register */
832 {2, 2, 8}, /* cost of loading SSE registers
833 in SImode, DImode and TImode */
834 {2, 2, 8}, /* cost of storing SSE registers
835 in SImode, DImode and TImode */
836 3, /* MMX or SSE register to integer */
837 8, /* size of l1 cache. */
838 256, /* size of l2 cache */
839 32, /* size of prefetch block */
840 6, /* number of parallel prefetches */
842 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
843 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
844 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
845 COSTS_N_INSNS (2), /* cost of FABS instruction. */
846 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
847 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
848 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
849 (we ensure the alignment). For small blocks inline loop is still a
850 noticeable win, for bigger blocks either rep movsl or rep movsb is
851 way to go. Rep movsb has apparently more expensive startup time in CPU,
852 but after 4K the difference is down in the noise. */
853 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
854 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
855 DUMMY_STRINGOP_ALGS},
856 {{rep_prefix_4_byte, {{1024, unrolled_loop},
857 {8192, rep_prefix_4_byte}, {-1, libcall}}},
858 DUMMY_STRINGOP_ALGS},
859 1, /* scalar_stmt_cost. */
860 1, /* scalar load_cost. */
861 1, /* scalar_store_cost. */
862 1, /* vec_stmt_cost. */
863 1, /* vec_to_scalar_cost. */
864 1, /* scalar_to_vec_cost. */
865 1, /* vec_align_load_cost. */
866 2, /* vec_unalign_load_cost. */
867 1, /* vec_store_cost. */
868 3, /* cond_taken_branch_cost. */
869 1, /* cond_not_taken_branch_cost. */
873 struct processor_costs geode_cost = {
874 COSTS_N_INSNS (1), /* cost of an add instruction */
875 COSTS_N_INSNS (1), /* cost of a lea instruction */
876 COSTS_N_INSNS (2), /* variable shift costs */
877 COSTS_N_INSNS (1), /* constant shift costs */
878 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
879 COSTS_N_INSNS (4), /* HI */
880 COSTS_N_INSNS (7), /* SI */
881 COSTS_N_INSNS (7), /* DI */
882 COSTS_N_INSNS (7)}, /* other */
883 0, /* cost of multiply per each bit set */
884 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
885 COSTS_N_INSNS (23), /* HI */
886 COSTS_N_INSNS (39), /* SI */
887 COSTS_N_INSNS (39), /* DI */
888 COSTS_N_INSNS (39)}, /* other */
889 COSTS_N_INSNS (1), /* cost of movsx */
890 COSTS_N_INSNS (1), /* cost of movzx */
891 8, /* "large" insn */
893 1, /* cost for loading QImode using movzbl */
894 {1, 1, 1}, /* cost of loading integer registers
895 in QImode, HImode and SImode.
896 Relative to reg-reg move (2). */
897 {1, 1, 1}, /* cost of storing integer registers */
898 1, /* cost of reg,reg fld/fst */
899 {1, 1, 1}, /* cost of loading fp registers
900 in SFmode, DFmode and XFmode */
901 {4, 6, 6}, /* cost of storing fp registers
902 in SFmode, DFmode and XFmode */
904 1, /* cost of moving MMX register */
905 {1, 1}, /* cost of loading MMX registers
906 in SImode and DImode */
907 {1, 1}, /* cost of storing MMX registers
908 in SImode and DImode */
909 1, /* cost of moving SSE register */
910 {1, 1, 1}, /* cost of loading SSE registers
911 in SImode, DImode and TImode */
912 {1, 1, 1}, /* cost of storing SSE registers
913 in SImode, DImode and TImode */
914 1, /* MMX or SSE register to integer */
915 64, /* size of l1 cache. */
916 128, /* size of l2 cache. */
917 32, /* size of prefetch block */
918 1, /* number of parallel prefetches */
920 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
921 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
922 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
923 COSTS_N_INSNS (1), /* cost of FABS instruction. */
924 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
925 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
926 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
927 DUMMY_STRINGOP_ALGS},
928 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
929 DUMMY_STRINGOP_ALGS},
930 1, /* scalar_stmt_cost. */
931 1, /* scalar load_cost. */
932 1, /* scalar_store_cost. */
933 1, /* vec_stmt_cost. */
934 1, /* vec_to_scalar_cost. */
935 1, /* scalar_to_vec_cost. */
936 1, /* vec_align_load_cost. */
937 2, /* vec_unalign_load_cost. */
938 1, /* vec_store_cost. */
939 3, /* cond_taken_branch_cost. */
940 1, /* cond_not_taken_branch_cost. */
944 struct processor_costs k6_cost = {
945 COSTS_N_INSNS (1), /* cost of an add instruction */
946 COSTS_N_INSNS (2), /* cost of a lea instruction */
947 COSTS_N_INSNS (1), /* variable shift costs */
948 COSTS_N_INSNS (1), /* constant shift costs */
949 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
950 COSTS_N_INSNS (3), /* HI */
951 COSTS_N_INSNS (3), /* SI */
952 COSTS_N_INSNS (3), /* DI */
953 COSTS_N_INSNS (3)}, /* other */
954 0, /* cost of multiply per each bit set */
955 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
956 COSTS_N_INSNS (18), /* HI */
957 COSTS_N_INSNS (18), /* SI */
958 COSTS_N_INSNS (18), /* DI */
959 COSTS_N_INSNS (18)}, /* other */
960 COSTS_N_INSNS (2), /* cost of movsx */
961 COSTS_N_INSNS (2), /* cost of movzx */
962 8, /* "large" insn */
964 3, /* cost for loading QImode using movzbl */
965 {4, 5, 4}, /* cost of loading integer registers
966 in QImode, HImode and SImode.
967 Relative to reg-reg move (2). */
968 {2, 3, 2}, /* cost of storing integer registers */
969 4, /* cost of reg,reg fld/fst */
970 {6, 6, 6}, /* cost of loading fp registers
971 in SFmode, DFmode and XFmode */
972 {4, 4, 4}, /* cost of storing fp registers
973 in SFmode, DFmode and XFmode */
974 2, /* cost of moving MMX register */
975 {2, 2}, /* cost of loading MMX registers
976 in SImode and DImode */
977 {2, 2}, /* cost of storing MMX registers
978 in SImode and DImode */
979 2, /* cost of moving SSE register */
980 {2, 2, 8}, /* cost of loading SSE registers
981 in SImode, DImode and TImode */
982 {2, 2, 8}, /* cost of storing SSE registers
983 in SImode, DImode and TImode */
984 6, /* MMX or SSE register to integer */
985 32, /* size of l1 cache. */
986 32, /* size of l2 cache. Some models
987 have integrated l2 cache, but
988 optimizing for k6 is not important
989 enough to worry about that. */
990 32, /* size of prefetch block */
991 1, /* number of parallel prefetches */
993 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
994 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
995 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
996 COSTS_N_INSNS (2), /* cost of FABS instruction. */
997 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
998 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
999 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1000 DUMMY_STRINGOP_ALGS},
1001 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1002 DUMMY_STRINGOP_ALGS},
1003 1, /* scalar_stmt_cost. */
1004 1, /* scalar load_cost. */
1005 1, /* scalar_store_cost. */
1006 1, /* vec_stmt_cost. */
1007 1, /* vec_to_scalar_cost. */
1008 1, /* scalar_to_vec_cost. */
1009 1, /* vec_align_load_cost. */
1010 2, /* vec_unalign_load_cost. */
1011 1, /* vec_store_cost. */
1012 3, /* cond_taken_branch_cost. */
1013 1, /* cond_not_taken_branch_cost. */
1017 struct processor_costs athlon_cost = {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (2), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (5), /* HI */
1024 COSTS_N_INSNS (5), /* SI */
1025 COSTS_N_INSNS (5), /* DI */
1026 COSTS_N_INSNS (5)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (26), /* HI */
1030 COSTS_N_INSNS (42), /* SI */
1031 COSTS_N_INSNS (74), /* DI */
1032 COSTS_N_INSNS (74)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1037 4, /* cost for loading QImode using movzbl */
1038 {3, 4, 3}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {3, 4, 3}, /* cost of storing integer registers */
1042 4, /* cost of reg,reg fld/fst */
1043 {4, 4, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {6, 6, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 6}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 5}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 5, /* MMX or SSE register to integer */
1058 64, /* size of l1 cache. */
1059 256, /* size of l2 cache. */
1060 64, /* size of prefetch block */
1061 6, /* number of parallel prefetches */
1062 5, /* Branch cost */
1063 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1064 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1065 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1066 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1067 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1068 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1069 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1070 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1071 128 bytes for memset. */
1072 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1073 DUMMY_STRINGOP_ALGS},
1074 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1075 DUMMY_STRINGOP_ALGS},
1076 1, /* scalar_stmt_cost. */
1077 1, /* scalar load_cost. */
1078 1, /* scalar_store_cost. */
1079 1, /* vec_stmt_cost. */
1080 1, /* vec_to_scalar_cost. */
1081 1, /* scalar_to_vec_cost. */
1082 1, /* vec_align_load_cost. */
1083 2, /* vec_unalign_load_cost. */
1084 1, /* vec_store_cost. */
1085 3, /* cond_taken_branch_cost. */
1086 1, /* cond_not_taken_branch_cost. */
1090 struct processor_costs k8_cost = {
1091 COSTS_N_INSNS (1), /* cost of an add instruction */
1092 COSTS_N_INSNS (2), /* cost of a lea instruction */
1093 COSTS_N_INSNS (1), /* variable shift costs */
1094 COSTS_N_INSNS (1), /* constant shift costs */
1095 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1096 COSTS_N_INSNS (4), /* HI */
1097 COSTS_N_INSNS (3), /* SI */
1098 COSTS_N_INSNS (4), /* DI */
1099 COSTS_N_INSNS (5)}, /* other */
1100 0, /* cost of multiply per each bit set */
1101 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1102 COSTS_N_INSNS (26), /* HI */
1103 COSTS_N_INSNS (42), /* SI */
1104 COSTS_N_INSNS (74), /* DI */
1105 COSTS_N_INSNS (74)}, /* other */
1106 COSTS_N_INSNS (1), /* cost of movsx */
1107 COSTS_N_INSNS (1), /* cost of movzx */
1108 8, /* "large" insn */
1110 4, /* cost for loading QImode using movzbl */
1111 {3, 4, 3}, /* cost of loading integer registers
1112 in QImode, HImode and SImode.
1113 Relative to reg-reg move (2). */
1114 {3, 4, 3}, /* cost of storing integer registers */
1115 4, /* cost of reg,reg fld/fst */
1116 {4, 4, 12}, /* cost of loading fp registers
1117 in SFmode, DFmode and XFmode */
1118 {6, 6, 8}, /* cost of storing fp registers
1119 in SFmode, DFmode and XFmode */
1120 2, /* cost of moving MMX register */
1121 {3, 3}, /* cost of loading MMX registers
1122 in SImode and DImode */
1123 {4, 4}, /* cost of storing MMX registers
1124 in SImode and DImode */
1125 2, /* cost of moving SSE register */
1126 {4, 3, 6}, /* cost of loading SSE registers
1127 in SImode, DImode and TImode */
1128 {4, 4, 5}, /* cost of storing SSE registers
1129 in SImode, DImode and TImode */
1130 5, /* MMX or SSE register to integer */
1131 64, /* size of l1 cache. */
1132 512, /* size of l2 cache. */
1133 64, /* size of prefetch block */
1134 /* New AMD processors never drop prefetches; if they cannot be performed
1135 immediately, they are queued. We set number of simultaneous prefetches
1136 to a large constant to reflect this (it probably is not a good idea not
1137 to limit number of prefetches at all, as their execution also takes some
1139 100, /* number of parallel prefetches */
1140 3, /* Branch cost */
1141 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1142 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1143 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1144 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1145 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1146 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1147 /* K8 has optimized REP instruction for medium sized blocks, but for very
1148 small blocks it is better to use loop. For large blocks, libcall can
1149 do nontemporary accesses and beat inline considerably. */
1150 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1151 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1152 {{libcall, {{8, loop}, {24, unrolled_loop},
1153 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1154 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1155 4, /* scalar_stmt_cost. */
1156 2, /* scalar load_cost. */
1157 2, /* scalar_store_cost. */
1158 5, /* vec_stmt_cost. */
1159 0, /* vec_to_scalar_cost. */
1160 2, /* scalar_to_vec_cost. */
1161 2, /* vec_align_load_cost. */
1162 3, /* vec_unalign_load_cost. */
1163 3, /* vec_store_cost. */
1164 3, /* cond_taken_branch_cost. */
1165 2, /* cond_not_taken_branch_cost. */
1168 struct processor_costs amdfam10_cost = {
1169 COSTS_N_INSNS (1), /* cost of an add instruction */
1170 COSTS_N_INSNS (2), /* cost of a lea instruction */
1171 COSTS_N_INSNS (1), /* variable shift costs */
1172 COSTS_N_INSNS (1), /* constant shift costs */
1173 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1174 COSTS_N_INSNS (4), /* HI */
1175 COSTS_N_INSNS (3), /* SI */
1176 COSTS_N_INSNS (4), /* DI */
1177 COSTS_N_INSNS (5)}, /* other */
1178 0, /* cost of multiply per each bit set */
1179 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1180 COSTS_N_INSNS (35), /* HI */
1181 COSTS_N_INSNS (51), /* SI */
1182 COSTS_N_INSNS (83), /* DI */
1183 COSTS_N_INSNS (83)}, /* other */
1184 COSTS_N_INSNS (1), /* cost of movsx */
1185 COSTS_N_INSNS (1), /* cost of movzx */
1186 8, /* "large" insn */
1188 4, /* cost for loading QImode using movzbl */
1189 {3, 4, 3}, /* cost of loading integer registers
1190 in QImode, HImode and SImode.
1191 Relative to reg-reg move (2). */
1192 {3, 4, 3}, /* cost of storing integer registers */
1193 4, /* cost of reg,reg fld/fst */
1194 {4, 4, 12}, /* cost of loading fp registers
1195 in SFmode, DFmode and XFmode */
1196 {6, 6, 8}, /* cost of storing fp registers
1197 in SFmode, DFmode and XFmode */
1198 2, /* cost of moving MMX register */
1199 {3, 3}, /* cost of loading MMX registers
1200 in SImode and DImode */
1201 {4, 4}, /* cost of storing MMX registers
1202 in SImode and DImode */
1203 2, /* cost of moving SSE register */
1204 {4, 4, 3}, /* cost of loading SSE registers
1205 in SImode, DImode and TImode */
1206 {4, 4, 5}, /* cost of storing SSE registers
1207 in SImode, DImode and TImode */
1208 3, /* MMX or SSE register to integer */
1210 MOVD reg64, xmmreg Double FSTORE 4
1211 MOVD reg32, xmmreg Double FSTORE 4
1213 MOVD reg64, xmmreg Double FADD 3
1215 MOVD reg32, xmmreg Double FADD 3
1217 64, /* size of l1 cache. */
1218 512, /* size of l2 cache. */
1219 64, /* size of prefetch block */
1220 /* New AMD processors never drop prefetches; if they cannot be performed
1221 immediately, they are queued. We set number of simultaneous prefetches
1222 to a large constant to reflect this (it probably is not a good idea not
1223 to limit number of prefetches at all, as their execution also takes some
1225 100, /* number of parallel prefetches */
1226 2, /* Branch cost */
1227 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1228 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1229 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1230 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1231 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1232 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1234 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1235 very small blocks it is better to use loop. For large blocks, libcall can
1236 do nontemporary accesses and beat inline considerably. */
1237 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1238 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1239 {{libcall, {{8, loop}, {24, unrolled_loop},
1240 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1241 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1242 4, /* scalar_stmt_cost. */
1243 2, /* scalar load_cost. */
1244 2, /* scalar_store_cost. */
1245 6, /* vec_stmt_cost. */
1246 0, /* vec_to_scalar_cost. */
1247 2, /* scalar_to_vec_cost. */
1248 2, /* vec_align_load_cost. */
1249 2, /* vec_unalign_load_cost. */
1250 2, /* vec_store_cost. */
1251 2, /* cond_taken_branch_cost. */
1252 1, /* cond_not_taken_branch_cost. */
1255 struct processor_costs bdver1_cost = {
1256 COSTS_N_INSNS (1), /* cost of an add instruction */
1257 COSTS_N_INSNS (1), /* cost of a lea instruction */
1258 COSTS_N_INSNS (1), /* variable shift costs */
1259 COSTS_N_INSNS (1), /* constant shift costs */
1260 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1261 COSTS_N_INSNS (4), /* HI */
1262 COSTS_N_INSNS (4), /* SI */
1263 COSTS_N_INSNS (6), /* DI */
1264 COSTS_N_INSNS (6)}, /* other */
1265 0, /* cost of multiply per each bit set */
1266 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1267 COSTS_N_INSNS (35), /* HI */
1268 COSTS_N_INSNS (51), /* SI */
1269 COSTS_N_INSNS (83), /* DI */
1270 COSTS_N_INSNS (83)}, /* other */
1271 COSTS_N_INSNS (1), /* cost of movsx */
1272 COSTS_N_INSNS (1), /* cost of movzx */
1273 8, /* "large" insn */
1275 4, /* cost for loading QImode using movzbl */
1276 {5, 5, 4}, /* cost of loading integer registers
1277 in QImode, HImode and SImode.
1278 Relative to reg-reg move (2). */
1279 {4, 4, 4}, /* cost of storing integer registers */
1280 2, /* cost of reg,reg fld/fst */
1281 {5, 5, 12}, /* cost of loading fp registers
1282 in SFmode, DFmode and XFmode */
1283 {4, 4, 8}, /* cost of storing fp registers
1284 in SFmode, DFmode and XFmode */
1285 2, /* cost of moving MMX register */
1286 {4, 4}, /* cost of loading MMX registers
1287 in SImode and DImode */
1288 {4, 4}, /* cost of storing MMX registers
1289 in SImode and DImode */
1290 2, /* cost of moving SSE register */
1291 {4, 4, 4}, /* cost of loading SSE registers
1292 in SImode, DImode and TImode */
1293 {4, 4, 4}, /* cost of storing SSE registers
1294 in SImode, DImode and TImode */
1295 2, /* MMX or SSE register to integer */
1297 MOVD reg64, xmmreg Double FSTORE 4
1298 MOVD reg32, xmmreg Double FSTORE 4
1300 MOVD reg64, xmmreg Double FADD 3
1302 MOVD reg32, xmmreg Double FADD 3
1304 16, /* size of l1 cache. */
1305 2048, /* size of l2 cache. */
1306 64, /* size of prefetch block */
1307 /* New AMD processors never drop prefetches; if they cannot be performed
1308 immediately, they are queued. We set number of simultaneous prefetches
1309 to a large constant to reflect this (it probably is not a good idea not
1310 to limit number of prefetches at all, as their execution also takes some
1312 100, /* number of parallel prefetches */
1313 2, /* Branch cost */
1314 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1315 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1316 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1317 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1318 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1319 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1321 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1322 very small blocks it is better to use loop. For large blocks, libcall
1323 can do nontemporary accesses and beat inline considerably. */
1324 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1325 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1326 {{libcall, {{8, loop}, {24, unrolled_loop},
1327 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1328 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1329 6, /* scalar_stmt_cost. */
1330 4, /* scalar load_cost. */
1331 4, /* scalar_store_cost. */
1332 6, /* vec_stmt_cost. */
1333 0, /* vec_to_scalar_cost. */
1334 2, /* scalar_to_vec_cost. */
1335 4, /* vec_align_load_cost. */
1336 4, /* vec_unalign_load_cost. */
1337 4, /* vec_store_cost. */
1338 2, /* cond_taken_branch_cost. */
1339 1, /* cond_not_taken_branch_cost. */
1342 struct processor_costs bdver2_cost = {
1343 COSTS_N_INSNS (1), /* cost of an add instruction */
1344 COSTS_N_INSNS (1), /* cost of a lea instruction */
1345 COSTS_N_INSNS (1), /* variable shift costs */
1346 COSTS_N_INSNS (1), /* constant shift costs */
1347 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1348 COSTS_N_INSNS (4), /* HI */
1349 COSTS_N_INSNS (4), /* SI */
1350 COSTS_N_INSNS (6), /* DI */
1351 COSTS_N_INSNS (6)}, /* other */
1352 0, /* cost of multiply per each bit set */
1353 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1354 COSTS_N_INSNS (35), /* HI */
1355 COSTS_N_INSNS (51), /* SI */
1356 COSTS_N_INSNS (83), /* DI */
1357 COSTS_N_INSNS (83)}, /* other */
1358 COSTS_N_INSNS (1), /* cost of movsx */
1359 COSTS_N_INSNS (1), /* cost of movzx */
1360 8, /* "large" insn */
1362 4, /* cost for loading QImode using movzbl */
1363 {5, 5, 4}, /* cost of loading integer registers
1364 in QImode, HImode and SImode.
1365 Relative to reg-reg move (2). */
1366 {4, 4, 4}, /* cost of storing integer registers */
1367 2, /* cost of reg,reg fld/fst */
1368 {5, 5, 12}, /* cost of loading fp registers
1369 in SFmode, DFmode and XFmode */
1370 {4, 4, 8}, /* cost of storing fp registers
1371 in SFmode, DFmode and XFmode */
1372 2, /* cost of moving MMX register */
1373 {4, 4}, /* cost of loading MMX registers
1374 in SImode and DImode */
1375 {4, 4}, /* cost of storing MMX registers
1376 in SImode and DImode */
1377 2, /* cost of moving SSE register */
1378 {4, 4, 4}, /* cost of loading SSE registers
1379 in SImode, DImode and TImode */
1380 {4, 4, 4}, /* cost of storing SSE registers
1381 in SImode, DImode and TImode */
1382 2, /* MMX or SSE register to integer */
1384 MOVD reg64, xmmreg Double FSTORE 4
1385 MOVD reg32, xmmreg Double FSTORE 4
1387 MOVD reg64, xmmreg Double FADD 3
1389 MOVD reg32, xmmreg Double FADD 3
1391 16, /* size of l1 cache. */
1392 2048, /* size of l2 cache. */
1393 64, /* size of prefetch block */
1394 /* New AMD processors never drop prefetches; if they cannot be performed
1395 immediately, they are queued. We set number of simultaneous prefetches
1396 to a large constant to reflect this (it probably is not a good idea not
1397 to limit number of prefetches at all, as their execution also takes some
1399 100, /* number of parallel prefetches */
1400 2, /* Branch cost */
1401 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1402 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1403 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1404 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1405 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1406 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1408 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1409 very small blocks it is better to use loop. For large blocks, libcall
1410 can do nontemporary accesses and beat inline considerably. */
1411 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1412 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1413 {{libcall, {{8, loop}, {24, unrolled_loop},
1414 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1415 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1416 6, /* scalar_stmt_cost. */
1417 4, /* scalar load_cost. */
1418 4, /* scalar_store_cost. */
1419 6, /* vec_stmt_cost. */
1420 0, /* vec_to_scalar_cost. */
1421 2, /* scalar_to_vec_cost. */
1422 4, /* vec_align_load_cost. */
1423 4, /* vec_unalign_load_cost. */
1424 4, /* vec_store_cost. */
1425 2, /* cond_taken_branch_cost. */
1426 1, /* cond_not_taken_branch_cost. */
1429 struct processor_costs btver1_cost = {
1430 COSTS_N_INSNS (1), /* cost of an add instruction */
1431 COSTS_N_INSNS (2), /* cost of a lea instruction */
1432 COSTS_N_INSNS (1), /* variable shift costs */
1433 COSTS_N_INSNS (1), /* constant shift costs */
1434 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1435 COSTS_N_INSNS (4), /* HI */
1436 COSTS_N_INSNS (3), /* SI */
1437 COSTS_N_INSNS (4), /* DI */
1438 COSTS_N_INSNS (5)}, /* other */
1439 0, /* cost of multiply per each bit set */
1440 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1441 COSTS_N_INSNS (35), /* HI */
1442 COSTS_N_INSNS (51), /* SI */
1443 COSTS_N_INSNS (83), /* DI */
1444 COSTS_N_INSNS (83)}, /* other */
1445 COSTS_N_INSNS (1), /* cost of movsx */
1446 COSTS_N_INSNS (1), /* cost of movzx */
1447 8, /* "large" insn */
1449 4, /* cost for loading QImode using movzbl */
1450 {3, 4, 3}, /* cost of loading integer registers
1451 in QImode, HImode and SImode.
1452 Relative to reg-reg move (2). */
1453 {3, 4, 3}, /* cost of storing integer registers */
1454 4, /* cost of reg,reg fld/fst */
1455 {4, 4, 12}, /* cost of loading fp registers
1456 in SFmode, DFmode and XFmode */
1457 {6, 6, 8}, /* cost of storing fp registers
1458 in SFmode, DFmode and XFmode */
1459 2, /* cost of moving MMX register */
1460 {3, 3}, /* cost of loading MMX registers
1461 in SImode and DImode */
1462 {4, 4}, /* cost of storing MMX registers
1463 in SImode and DImode */
1464 2, /* cost of moving SSE register */
1465 {4, 4, 3}, /* cost of loading SSE registers
1466 in SImode, DImode and TImode */
1467 {4, 4, 5}, /* cost of storing SSE registers
1468 in SImode, DImode and TImode */
1469 3, /* MMX or SSE register to integer */
1471 MOVD reg64, xmmreg Double FSTORE 4
1472 MOVD reg32, xmmreg Double FSTORE 4
1474 MOVD reg64, xmmreg Double FADD 3
1476 MOVD reg32, xmmreg Double FADD 3
1478 32, /* size of l1 cache. */
1479 512, /* size of l2 cache. */
1480 64, /* size of prefetch block */
1481 100, /* number of parallel prefetches */
1482 2, /* Branch cost */
1483 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1484 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1485 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1486 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1487 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1488 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1490 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1491 very small blocks it is better to use loop. For large blocks, libcall can
1492 do nontemporary accesses and beat inline considerably. */
1493 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1494 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1495 {{libcall, {{8, loop}, {24, unrolled_loop},
1496 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1497 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1498 4, /* scalar_stmt_cost. */
1499 2, /* scalar load_cost. */
1500 2, /* scalar_store_cost. */
1501 6, /* vec_stmt_cost. */
1502 0, /* vec_to_scalar_cost. */
1503 2, /* scalar_to_vec_cost. */
1504 2, /* vec_align_load_cost. */
1505 2, /* vec_unalign_load_cost. */
1506 2, /* vec_store_cost. */
1507 2, /* cond_taken_branch_cost. */
1508 1, /* cond_not_taken_branch_cost. */
1512 struct processor_costs pentium4_cost = {
1513 COSTS_N_INSNS (1), /* cost of an add instruction */
1514 COSTS_N_INSNS (3), /* cost of a lea instruction */
1515 COSTS_N_INSNS (4), /* variable shift costs */
1516 COSTS_N_INSNS (4), /* constant shift costs */
1517 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1518 COSTS_N_INSNS (15), /* HI */
1519 COSTS_N_INSNS (15), /* SI */
1520 COSTS_N_INSNS (15), /* DI */
1521 COSTS_N_INSNS (15)}, /* other */
1522 0, /* cost of multiply per each bit set */
1523 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1524 COSTS_N_INSNS (56), /* HI */
1525 COSTS_N_INSNS (56), /* SI */
1526 COSTS_N_INSNS (56), /* DI */
1527 COSTS_N_INSNS (56)}, /* other */
1528 COSTS_N_INSNS (1), /* cost of movsx */
1529 COSTS_N_INSNS (1), /* cost of movzx */
1530 16, /* "large" insn */
1532 2, /* cost for loading QImode using movzbl */
1533 {4, 5, 4}, /* cost of loading integer registers
1534 in QImode, HImode and SImode.
1535 Relative to reg-reg move (2). */
1536 {2, 3, 2}, /* cost of storing integer registers */
1537 2, /* cost of reg,reg fld/fst */
1538 {2, 2, 6}, /* cost of loading fp registers
1539 in SFmode, DFmode and XFmode */
1540 {4, 4, 6}, /* cost of storing fp registers
1541 in SFmode, DFmode and XFmode */
1542 2, /* cost of moving MMX register */
1543 {2, 2}, /* cost of loading MMX registers
1544 in SImode and DImode */
1545 {2, 2}, /* cost of storing MMX registers
1546 in SImode and DImode */
1547 12, /* cost of moving SSE register */
1548 {12, 12, 12}, /* cost of loading SSE registers
1549 in SImode, DImode and TImode */
1550 {2, 2, 8}, /* cost of storing SSE registers
1551 in SImode, DImode and TImode */
1552 10, /* MMX or SSE register to integer */
1553 8, /* size of l1 cache. */
1554 256, /* size of l2 cache. */
1555 64, /* size of prefetch block */
1556 6, /* number of parallel prefetches */
1557 2, /* Branch cost */
1558 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1559 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1560 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1561 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1562 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1563 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1564 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1565 DUMMY_STRINGOP_ALGS},
1566 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1568 DUMMY_STRINGOP_ALGS},
1569 1, /* scalar_stmt_cost. */
1570 1, /* scalar load_cost. */
1571 1, /* scalar_store_cost. */
1572 1, /* vec_stmt_cost. */
1573 1, /* vec_to_scalar_cost. */
1574 1, /* scalar_to_vec_cost. */
1575 1, /* vec_align_load_cost. */
1576 2, /* vec_unalign_load_cost. */
1577 1, /* vec_store_cost. */
1578 3, /* cond_taken_branch_cost. */
1579 1, /* cond_not_taken_branch_cost. */
1583 struct processor_costs nocona_cost = {
1584 COSTS_N_INSNS (1), /* cost of an add instruction */
1585 COSTS_N_INSNS (1), /* cost of a lea instruction */
1586 COSTS_N_INSNS (1), /* variable shift costs */
1587 COSTS_N_INSNS (1), /* constant shift costs */
1588 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1589 COSTS_N_INSNS (10), /* HI */
1590 COSTS_N_INSNS (10), /* SI */
1591 COSTS_N_INSNS (10), /* DI */
1592 COSTS_N_INSNS (10)}, /* other */
1593 0, /* cost of multiply per each bit set */
1594 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1595 COSTS_N_INSNS (66), /* HI */
1596 COSTS_N_INSNS (66), /* SI */
1597 COSTS_N_INSNS (66), /* DI */
1598 COSTS_N_INSNS (66)}, /* other */
1599 COSTS_N_INSNS (1), /* cost of movsx */
1600 COSTS_N_INSNS (1), /* cost of movzx */
1601 16, /* "large" insn */
1602 17, /* MOVE_RATIO */
1603 4, /* cost for loading QImode using movzbl */
1604 {4, 4, 4}, /* cost of loading integer registers
1605 in QImode, HImode and SImode.
1606 Relative to reg-reg move (2). */
1607 {4, 4, 4}, /* cost of storing integer registers */
1608 3, /* cost of reg,reg fld/fst */
1609 {12, 12, 12}, /* cost of loading fp registers
1610 in SFmode, DFmode and XFmode */
1611 {4, 4, 4}, /* cost of storing fp registers
1612 in SFmode, DFmode and XFmode */
1613 6, /* cost of moving MMX register */
1614 {12, 12}, /* cost of loading MMX registers
1615 in SImode and DImode */
1616 {12, 12}, /* cost of storing MMX registers
1617 in SImode and DImode */
1618 6, /* cost of moving SSE register */
1619 {12, 12, 12}, /* cost of loading SSE registers
1620 in SImode, DImode and TImode */
1621 {12, 12, 12}, /* cost of storing SSE registers
1622 in SImode, DImode and TImode */
1623 8, /* MMX or SSE register to integer */
1624 8, /* size of l1 cache. */
1625 1024, /* size of l2 cache. */
1626 128, /* size of prefetch block */
1627 8, /* number of parallel prefetches */
1628 1, /* Branch cost */
1629 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1630 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1631 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1632 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1633 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1634 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1635 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1636 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1637 {100000, unrolled_loop}, {-1, libcall}}}},
1638 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1640 {libcall, {{24, loop}, {64, unrolled_loop},
1641 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1642 1, /* scalar_stmt_cost. */
1643 1, /* scalar load_cost. */
1644 1, /* scalar_store_cost. */
1645 1, /* vec_stmt_cost. */
1646 1, /* vec_to_scalar_cost. */
1647 1, /* scalar_to_vec_cost. */
1648 1, /* vec_align_load_cost. */
1649 2, /* vec_unalign_load_cost. */
1650 1, /* vec_store_cost. */
1651 3, /* cond_taken_branch_cost. */
1652 1, /* cond_not_taken_branch_cost. */
1656 struct processor_costs atom_cost = {
1657 COSTS_N_INSNS (1), /* cost of an add instruction */
1658 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1659 COSTS_N_INSNS (1), /* variable shift costs */
1660 COSTS_N_INSNS (1), /* constant shift costs */
1661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1662 COSTS_N_INSNS (4), /* HI */
1663 COSTS_N_INSNS (3), /* SI */
1664 COSTS_N_INSNS (4), /* DI */
1665 COSTS_N_INSNS (2)}, /* other */
1666 0, /* cost of multiply per each bit set */
1667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1668 COSTS_N_INSNS (26), /* HI */
1669 COSTS_N_INSNS (42), /* SI */
1670 COSTS_N_INSNS (74), /* DI */
1671 COSTS_N_INSNS (74)}, /* other */
1672 COSTS_N_INSNS (1), /* cost of movsx */
1673 COSTS_N_INSNS (1), /* cost of movzx */
1674 8, /* "large" insn */
1675 17, /* MOVE_RATIO */
1676 4, /* cost for loading QImode using movzbl */
1677 {4, 4, 4}, /* cost of loading integer registers
1678 in QImode, HImode and SImode.
1679 Relative to reg-reg move (2). */
1680 {4, 4, 4}, /* cost of storing integer registers */
1681 4, /* cost of reg,reg fld/fst */
1682 {12, 12, 12}, /* cost of loading fp registers
1683 in SFmode, DFmode and XFmode */
1684 {6, 6, 8}, /* cost of storing fp registers
1685 in SFmode, DFmode and XFmode */
1686 2, /* cost of moving MMX register */
1687 {8, 8}, /* cost of loading MMX registers
1688 in SImode and DImode */
1689 {8, 8}, /* cost of storing MMX registers
1690 in SImode and DImode */
1691 2, /* cost of moving SSE register */
1692 {8, 8, 8}, /* cost of loading SSE registers
1693 in SImode, DImode and TImode */
1694 {8, 8, 8}, /* cost of storing SSE registers
1695 in SImode, DImode and TImode */
1696 5, /* MMX or SSE register to integer */
1697 32, /* size of l1 cache. */
1698 256, /* size of l2 cache. */
1699 64, /* size of prefetch block */
1700 6, /* number of parallel prefetches */
1701 3, /* Branch cost */
1702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1703 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1704 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1705 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1706 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1707 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1708 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1709 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1710 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1711 {{libcall, {{8, loop}, {15, unrolled_loop},
1712 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1713 {libcall, {{24, loop}, {32, unrolled_loop},
1714 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1715 1, /* scalar_stmt_cost. */
1716 1, /* scalar load_cost. */
1717 1, /* scalar_store_cost. */
1718 1, /* vec_stmt_cost. */
1719 1, /* vec_to_scalar_cost. */
1720 1, /* scalar_to_vec_cost. */
1721 1, /* vec_align_load_cost. */
1722 2, /* vec_unalign_load_cost. */
1723 1, /* vec_store_cost. */
1724 3, /* cond_taken_branch_cost. */
1725 1, /* cond_not_taken_branch_cost. */
1728 /* Generic64 should produce code tuned for Nocona and K8. */
1730 struct processor_costs generic64_cost = {
1731 COSTS_N_INSNS (1), /* cost of an add instruction */
1732 /* On all chips taken into consideration lea is 2 cycles and more. With
1733 this cost however our current implementation of synth_mult results in
1734 use of unnecessary temporary registers causing regression on several
1735 SPECfp benchmarks. */
1736 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1737 COSTS_N_INSNS (1), /* variable shift costs */
1738 COSTS_N_INSNS (1), /* constant shift costs */
1739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1740 COSTS_N_INSNS (4), /* HI */
1741 COSTS_N_INSNS (3), /* SI */
1742 COSTS_N_INSNS (4), /* DI */
1743 COSTS_N_INSNS (2)}, /* other */
1744 0, /* cost of multiply per each bit set */
1745 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1746 COSTS_N_INSNS (26), /* HI */
1747 COSTS_N_INSNS (42), /* SI */
1748 COSTS_N_INSNS (74), /* DI */
1749 COSTS_N_INSNS (74)}, /* other */
1750 COSTS_N_INSNS (1), /* cost of movsx */
1751 COSTS_N_INSNS (1), /* cost of movzx */
1752 8, /* "large" insn */
1753 17, /* MOVE_RATIO */
1754 4, /* cost for loading QImode using movzbl */
1755 {4, 4, 4}, /* cost of loading integer registers
1756 in QImode, HImode and SImode.
1757 Relative to reg-reg move (2). */
1758 {4, 4, 4}, /* cost of storing integer registers */
1759 4, /* cost of reg,reg fld/fst */
1760 {12, 12, 12}, /* cost of loading fp registers
1761 in SFmode, DFmode and XFmode */
1762 {6, 6, 8}, /* cost of storing fp registers
1763 in SFmode, DFmode and XFmode */
1764 2, /* cost of moving MMX register */
1765 {8, 8}, /* cost of loading MMX registers
1766 in SImode and DImode */
1767 {8, 8}, /* cost of storing MMX registers
1768 in SImode and DImode */
1769 2, /* cost of moving SSE register */
1770 {8, 8, 8}, /* cost of loading SSE registers
1771 in SImode, DImode and TImode */
1772 {8, 8, 8}, /* cost of storing SSE registers
1773 in SImode, DImode and TImode */
1774 5, /* MMX or SSE register to integer */
1775 32, /* size of l1 cache. */
1776 512, /* size of l2 cache. */
1777 64, /* size of prefetch block */
1778 6, /* number of parallel prefetches */
1779 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1780 value is increased to perhaps more appropriate value of 5. */
1781 3, /* Branch cost */
1782 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1783 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1784 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1785 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1786 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1787 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1788 {DUMMY_STRINGOP_ALGS,
1789 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1790 {DUMMY_STRINGOP_ALGS,
1791 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1792 1, /* scalar_stmt_cost. */
1793 1, /* scalar load_cost. */
1794 1, /* scalar_store_cost. */
1795 1, /* vec_stmt_cost. */
1796 1, /* vec_to_scalar_cost. */
1797 1, /* scalar_to_vec_cost. */
1798 1, /* vec_align_load_cost. */
1799 2, /* vec_unalign_load_cost. */
1800 1, /* vec_store_cost. */
1801 3, /* cond_taken_branch_cost. */
1802 1, /* cond_not_taken_branch_cost. */
1805 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1808 struct processor_costs generic32_cost = {
1809 COSTS_N_INSNS (1), /* cost of an add instruction */
1810 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1811 COSTS_N_INSNS (1), /* variable shift costs */
1812 COSTS_N_INSNS (1), /* constant shift costs */
1813 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1814 COSTS_N_INSNS (4), /* HI */
1815 COSTS_N_INSNS (3), /* SI */
1816 COSTS_N_INSNS (4), /* DI */
1817 COSTS_N_INSNS (2)}, /* other */
1818 0, /* cost of multiply per each bit set */
1819 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1820 COSTS_N_INSNS (26), /* HI */
1821 COSTS_N_INSNS (42), /* SI */
1822 COSTS_N_INSNS (74), /* DI */
1823 COSTS_N_INSNS (74)}, /* other */
1824 COSTS_N_INSNS (1), /* cost of movsx */
1825 COSTS_N_INSNS (1), /* cost of movzx */
1826 8, /* "large" insn */
1827 17, /* MOVE_RATIO */
1828 4, /* cost for loading QImode using movzbl */
1829 {4, 4, 4}, /* cost of loading integer registers
1830 in QImode, HImode and SImode.
1831 Relative to reg-reg move (2). */
1832 {4, 4, 4}, /* cost of storing integer registers */
1833 4, /* cost of reg,reg fld/fst */
1834 {12, 12, 12}, /* cost of loading fp registers
1835 in SFmode, DFmode and XFmode */
1836 {6, 6, 8}, /* cost of storing fp registers
1837 in SFmode, DFmode and XFmode */
1838 2, /* cost of moving MMX register */
1839 {8, 8}, /* cost of loading MMX registers
1840 in SImode and DImode */
1841 {8, 8}, /* cost of storing MMX registers
1842 in SImode and DImode */
1843 2, /* cost of moving SSE register */
1844 {8, 8, 8}, /* cost of loading SSE registers
1845 in SImode, DImode and TImode */
1846 {8, 8, 8}, /* cost of storing SSE registers
1847 in SImode, DImode and TImode */
1848 5, /* MMX or SSE register to integer */
1849 32, /* size of l1 cache. */
1850 256, /* size of l2 cache. */
1851 64, /* size of prefetch block */
1852 6, /* number of parallel prefetches */
1853 3, /* Branch cost */
1854 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1855 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1856 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1857 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1858 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1859 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1860 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1861 DUMMY_STRINGOP_ALGS},
1862 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1863 DUMMY_STRINGOP_ALGS},
1864 1, /* scalar_stmt_cost. */
1865 1, /* scalar load_cost. */
1866 1, /* scalar_store_cost. */
1867 1, /* vec_stmt_cost. */
1868 1, /* vec_to_scalar_cost. */
1869 1, /* scalar_to_vec_cost. */
1870 1, /* vec_align_load_cost. */
1871 2, /* vec_unalign_load_cost. */
1872 1, /* vec_store_cost. */
1873 3, /* cond_taken_branch_cost. */
1874 1, /* cond_not_taken_branch_cost. */
1877 const struct processor_costs *ix86_cost = &pentium_cost;
1879 /* Processor feature/optimization bitmasks. */
1880 #define m_386 (1<<PROCESSOR_I386)
1881 #define m_486 (1<<PROCESSOR_I486)
1882 #define m_PENT (1<<PROCESSOR_PENTIUM)
1883 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1884 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1885 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1886 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1887 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1888 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1889 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1890 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1891 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1892 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1893 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1894 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1895 #define m_ATOM (1<<PROCESSOR_ATOM)
1897 #define m_GEODE (1<<PROCESSOR_GEODE)
1898 #define m_K6 (1<<PROCESSOR_K6)
1899 #define m_K6_GEODE (m_K6 | m_GEODE)
1900 #define m_K8 (1<<PROCESSOR_K8)
1901 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1902 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1903 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1904 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1905 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1906 #define m_BDVER (m_BDVER1 | m_BDVER2)
1907 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1908 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1910 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1911 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1913 /* Generic instruction choice should be common subset of supported CPUs
1914 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1915 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1917 /* Feature tests against the various tunings. */
1918 unsigned char ix86_tune_features[X86_TUNE_LAST];
1920 /* Feature tests against the various tunings used to create ix86_tune_features
1921 based on the processor mask. */
1922 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1923 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1924 negatively, so enabling for Generic64 seems like good code size
1925 tradeoff. We can't enable it for 32bit generic because it does not
1926 work well with PPro base chips. */
1927 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1929 /* X86_TUNE_PUSH_MEMORY */
1930 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1932 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1935 /* X86_TUNE_UNROLL_STRLEN */
1936 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1938 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1939 on simulation result. But after P4 was made, no performance benefit
1940 was observed with branch hints. It also increases the code size.
1941 As a result, icc never generates branch hints. */
1944 /* X86_TUNE_DOUBLE_WITH_ADD */
1947 /* X86_TUNE_USE_SAHF */
1948 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1950 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1951 partial dependencies. */
1952 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1954 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1955 register stalls on Generic32 compilation setting as well. However
1956 in current implementation the partial register stalls are not eliminated
1957 very well - they can be introduced via subregs synthesized by combine
1958 and can happen in caller/callee saving sequences. Because this option
1959 pays back little on PPro based chips and is in conflict with partial reg
1960 dependencies used by Athlon/P4 based chips, it is better to leave it off
1961 for generic32 for now. */
1964 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1965 m_CORE2I7 | m_GENERIC,
1967 /* X86_TUNE_USE_HIMODE_FIOP */
1968 m_386 | m_486 | m_K6_GEODE,
1970 /* X86_TUNE_USE_SIMODE_FIOP */
1971 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1973 /* X86_TUNE_USE_MOV0 */
1976 /* X86_TUNE_USE_CLTD */
1977 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1979 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1982 /* X86_TUNE_SPLIT_LONG_MOVES */
1985 /* X86_TUNE_READ_MODIFY_WRITE */
1988 /* X86_TUNE_READ_MODIFY */
1991 /* X86_TUNE_PROMOTE_QIMODE */
1992 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1994 /* X86_TUNE_FAST_PREFIX */
1995 ~(m_386 | m_486 | m_PENT),
1997 /* X86_TUNE_SINGLE_STRINGOP */
1998 m_386 | m_P4_NOCONA,
2000 /* X86_TUNE_QIMODE_MATH */
2003 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2004 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2005 might be considered for Generic32 if our scheme for avoiding partial
2006 stalls was more effective. */
2009 /* X86_TUNE_PROMOTE_QI_REGS */
2012 /* X86_TUNE_PROMOTE_HI_REGS */
2015 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2016 over esp addition. */
2017 m_386 | m_486 | m_PENT | m_PPRO,
2019 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2020 over esp addition. */
2023 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2024 over esp subtraction. */
2025 m_386 | m_486 | m_PENT | m_K6_GEODE,
2027 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2028 over esp subtraction. */
2029 m_PENT | m_K6_GEODE,
2031 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2032 for DFmode copies */
2033 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2035 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2036 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2038 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2039 conflict here in between PPro/Pentium4 based chips that thread 128bit
2040 SSE registers as single units versus K8 based chips that divide SSE
2041 registers to two 64bit halves. This knob promotes all store destinations
2042 to be 128bit to allow register renaming on 128bit SSE units, but usually
2043 results in one extra microop on 64bit SSE units. Experimental results
2044 shows that disabling this option on P4 brings over 20% SPECfp regression,
2045 while enabling it on K8 brings roughly 2.4% regression that can be partly
2046 masked by careful scheduling of moves. */
2047 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2049 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2050 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2052 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2055 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2058 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2059 are resolved on SSE register parts instead of whole registers, so we may
2060 maintain just lower part of scalar values in proper format leaving the
2061 upper part undefined. */
2064 /* X86_TUNE_SSE_TYPELESS_STORES */
2067 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2068 m_PPRO | m_P4_NOCONA,
2070 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2071 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2073 /* X86_TUNE_PROLOGUE_USING_MOVE */
2074 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2076 /* X86_TUNE_EPILOGUE_USING_MOVE */
2077 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2079 /* X86_TUNE_SHIFT1 */
2082 /* X86_TUNE_USE_FFREEP */
2085 /* X86_TUNE_INTER_UNIT_MOVES */
2086 ~(m_AMD_MULTIPLE | m_GENERIC),
2088 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2089 ~(m_AMDFAM10 | m_BDVER ),
2091 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2092 than 4 branch instructions in the 16 byte window. */
2093 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2095 /* X86_TUNE_SCHEDULE */
2096 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2098 /* X86_TUNE_USE_BT */
2099 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2101 /* X86_TUNE_USE_INCDEC */
2102 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2104 /* X86_TUNE_PAD_RETURNS */
2105 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2107 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2110 /* X86_TUNE_EXT_80387_CONSTANTS */
2111 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2113 /* X86_TUNE_SHORTEN_X87_SSE */
2116 /* X86_TUNE_AVOID_VECTOR_DECODE */
2117 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2119 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2120 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2123 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2124 vector path on AMD machines. */
2125 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2127 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2129 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2131 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2135 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2136 but one byte longer. */
2139 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2140 operand that cannot be represented using a modRM byte. The XOR
2141 replacement is long decoded, so this split helps here as well. */
2144 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2146 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2148 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2149 from integer to FP. */
2152 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2153 with a subsequent conditional jump instruction into a single
2154 compare-and-branch uop. */
2157 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2158 will impact LEA instruction selection. */
2161 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2165 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2166 at -O3. For the moment, the prefetching seems badly tuned for Intel
2168 m_K6_GEODE | m_AMD_MULTIPLE,
2170 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2171 the auto-vectorizer. */
2174 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2175 during reassociation of integer computation. */
2178 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2179 during reassociation of fp computation. */
2183 /* Feature tests against the various architecture variations. */
2184 unsigned char ix86_arch_features[X86_ARCH_LAST];
2186 /* Feature tests against the various architecture variations, used to create
2187 ix86_arch_features based on the processor mask. */
2188 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2189 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2190 ~(m_386 | m_486 | m_PENT | m_K6),
2192 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2195 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2198 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2201 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2205 static const unsigned int x86_accumulate_outgoing_args
2206 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2208 static const unsigned int x86_arch_always_fancy_math_387
2209 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2211 static const unsigned int x86_avx256_split_unaligned_load
2212 = m_COREI7 | m_GENERIC;
2214 static const unsigned int x86_avx256_split_unaligned_store
2215 = m_COREI7 | m_BDVER | m_GENERIC;
2217 /* In case the average insn count for single function invocation is
2218 lower than this constant, emit fast (but longer) prologue and
2220 #define FAST_PROLOGUE_INSN_COUNT 20
2222 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2223 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2224 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2225 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2227 /* Array of the smallest class containing reg number REGNO, indexed by
2228 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2230 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2232 /* ax, dx, cx, bx */
2233 AREG, DREG, CREG, BREG,
2234 /* si, di, bp, sp */
2235 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2237 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2238 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2241 /* flags, fpsr, fpcr, frame */
2242 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2244 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2247 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2250 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2251 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2252 /* SSE REX registers */
2253 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2257 /* The "default" register map used in 32bit mode. */
2259 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2261 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2262 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2263 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2264 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2265 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2270 /* The "default" register map used in 64bit mode. */
2272 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2274 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2275 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2276 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2277 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2278 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2279 8,9,10,11,12,13,14,15, /* extended integer registers */
2280 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2283 /* Define the register numbers to be used in Dwarf debugging information.
2284 The SVR4 reference port C compiler uses the following register numbers
2285 in its Dwarf output code:
2286 0 for %eax (gcc regno = 0)
2287 1 for %ecx (gcc regno = 2)
2288 2 for %edx (gcc regno = 1)
2289 3 for %ebx (gcc regno = 3)
2290 4 for %esp (gcc regno = 7)
2291 5 for %ebp (gcc regno = 6)
2292 6 for %esi (gcc regno = 4)
2293 7 for %edi (gcc regno = 5)
2294 The following three DWARF register numbers are never generated by
2295 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2296 believes these numbers have these meanings.
2297 8 for %eip (no gcc equivalent)
2298 9 for %eflags (gcc regno = 17)
2299 10 for %trapno (no gcc equivalent)
2300 It is not at all clear how we should number the FP stack registers
2301 for the x86 architecture. If the version of SDB on x86/svr4 were
2302 a bit less brain dead with respect to floating-point then we would
2303 have a precedent to follow with respect to DWARF register numbers
2304 for x86 FP registers, but the SDB on x86/svr4 is so completely
2305 broken with respect to FP registers that it is hardly worth thinking
2306 of it as something to strive for compatibility with.
2307 The version of x86/svr4 SDB I have at the moment does (partially)
2308 seem to believe that DWARF register number 11 is associated with
2309 the x86 register %st(0), but that's about all. Higher DWARF
2310 register numbers don't seem to be associated with anything in
2311 particular, and even for DWARF regno 11, SDB only seems to under-
2312 stand that it should say that a variable lives in %st(0) (when
2313 asked via an `=' command) if we said it was in DWARF regno 11,
2314 but SDB still prints garbage when asked for the value of the
2315 variable in question (via a `/' command).
2316 (Also note that the labels SDB prints for various FP stack regs
2317 when doing an `x' command are all wrong.)
2318 Note that these problems generally don't affect the native SVR4
2319 C compiler because it doesn't allow the use of -O with -g and
2320 because when it is *not* optimizing, it allocates a memory
2321 location for each floating-point variable, and the memory
2322 location is what gets described in the DWARF AT_location
2323 attribute for the variable in question.
2324 Regardless of the severe mental illness of the x86/svr4 SDB, we
2325 do something sensible here and we use the following DWARF
2326 register numbers. Note that these are all stack-top-relative
2328 11 for %st(0) (gcc regno = 8)
2329 12 for %st(1) (gcc regno = 9)
2330 13 for %st(2) (gcc regno = 10)
2331 14 for %st(3) (gcc regno = 11)
2332 15 for %st(4) (gcc regno = 12)
2333 16 for %st(5) (gcc regno = 13)
2334 17 for %st(6) (gcc regno = 14)
2335 18 for %st(7) (gcc regno = 15)
2337 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2339 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2340 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2341 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2342 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2343 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2345 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2348 /* Define parameter passing and return registers. */
2350 static int const x86_64_int_parameter_registers[6] =
2352 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2355 static int const x86_64_ms_abi_int_parameter_registers[4] =
2357 CX_REG, DX_REG, R8_REG, R9_REG
2360 static int const x86_64_int_return_registers[4] =
2362 AX_REG, DX_REG, DI_REG, SI_REG
2365 /* Define the structure for the machine field in struct function. */
2367 struct GTY(()) stack_local_entry {
2368 unsigned short mode;
2371 struct stack_local_entry *next;
2374 /* Structure describing stack frame layout.
2375 Stack grows downward:
2381 saved static chain if ix86_static_chain_on_stack
2383 saved frame pointer if frame_pointer_needed
2384 <- HARD_FRAME_POINTER
2390 <- sse_regs_save_offset
2393 [va_arg registers] |
2397 [padding2] | = to_allocate
2406 int outgoing_arguments_size;
2407 HOST_WIDE_INT frame;
2409 /* The offsets relative to ARG_POINTER. */
2410 HOST_WIDE_INT frame_pointer_offset;
2411 HOST_WIDE_INT hard_frame_pointer_offset;
2412 HOST_WIDE_INT stack_pointer_offset;
2413 HOST_WIDE_INT hfp_save_offset;
2414 HOST_WIDE_INT reg_save_offset;
2415 HOST_WIDE_INT sse_reg_save_offset;
2417 /* When save_regs_using_mov is set, emit prologue using
2418 move instead of push instructions. */
2419 bool save_regs_using_mov;
2422 /* Which cpu are we scheduling for. */
2423 enum attr_cpu ix86_schedule;
2425 /* Which cpu are we optimizing for. */
2426 enum processor_type ix86_tune;
2428 /* Which instruction set architecture to use. */
2429 enum processor_type ix86_arch;
2431 /* true if sse prefetch instruction is not NOOP. */
2432 int x86_prefetch_sse;
2434 /* -mstackrealign option */
2435 static const char ix86_force_align_arg_pointer_string[]
2436 = "force_align_arg_pointer";
2438 static rtx (*ix86_gen_leave) (void);
2439 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2440 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2441 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2442 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2443 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2444 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2445 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2446 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2447 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2449 /* Preferred alignment for stack boundary in bits. */
2450 unsigned int ix86_preferred_stack_boundary;
2452 /* Alignment for incoming stack boundary in bits specified at
2454 static unsigned int ix86_user_incoming_stack_boundary;
2456 /* Default alignment for incoming stack boundary in bits. */
2457 static unsigned int ix86_default_incoming_stack_boundary;
2459 /* Alignment for incoming stack boundary in bits. */
2460 unsigned int ix86_incoming_stack_boundary;
2462 /* Calling abi specific va_list type nodes. */
2463 static GTY(()) tree sysv_va_list_type_node;
2464 static GTY(()) tree ms_va_list_type_node;
2466 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2467 char internal_label_prefix[16];
2468 int internal_label_prefix_len;
2470 /* Fence to use after loop using movnt. */
2473 /* Register class used for passing given 64bit part of the argument.
2474 These represent classes as documented by the PS ABI, with the exception
2475 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2476 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2478 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2479 whenever possible (upper half does contain padding). */
2480 enum x86_64_reg_class
2483 X86_64_INTEGER_CLASS,
2484 X86_64_INTEGERSI_CLASS,
2491 X86_64_COMPLEX_X87_CLASS,
2495 #define MAX_CLASSES 4
2497 /* Table of constants used by fldpi, fldln2, etc.... */
2498 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2499 static bool ext_80387_constants_init = 0;
2502 static struct machine_function * ix86_init_machine_status (void);
2503 static rtx ix86_function_value (const_tree, const_tree, bool);
2504 static bool ix86_function_value_regno_p (const unsigned int);
2505 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2507 static rtx ix86_static_chain (const_tree, bool);
2508 static int ix86_function_regparm (const_tree, const_tree);
2509 static void ix86_compute_frame_layout (struct ix86_frame *);
2510 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2512 static void ix86_add_new_builtins (HOST_WIDE_INT);
2513 static tree ix86_canonical_va_list_type (tree);
2514 static void predict_jump (int);
2515 static unsigned int split_stack_prologue_scratch_regno (void);
2516 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2518 enum ix86_function_specific_strings
2520 IX86_FUNCTION_SPECIFIC_ARCH,
2521 IX86_FUNCTION_SPECIFIC_TUNE,
2522 IX86_FUNCTION_SPECIFIC_MAX
2525 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2526 const char *, enum fpmath_unit, bool);
2527 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2528 static void ix86_function_specific_save (struct cl_target_option *);
2529 static void ix86_function_specific_restore (struct cl_target_option *);
2530 static void ix86_function_specific_print (FILE *, int,
2531 struct cl_target_option *);
2532 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2533 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2534 struct gcc_options *);
2535 static bool ix86_can_inline_p (tree, tree);
2536 static void ix86_set_current_function (tree);
2537 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2539 static enum calling_abi ix86_function_abi (const_tree);
2542 #ifndef SUBTARGET32_DEFAULT_CPU
2543 #define SUBTARGET32_DEFAULT_CPU "i386"
2546 /* The svr4 ABI for the i386 says that records and unions are returned
2548 #ifndef DEFAULT_PCC_STRUCT_RETURN
2549 #define DEFAULT_PCC_STRUCT_RETURN 1
2552 /* Whether -mtune= or -march= were specified */
2553 static int ix86_tune_defaulted;
2554 static int ix86_arch_specified;
2556 /* Vectorization library interface and handlers. */
2557 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2559 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2560 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2562 /* Processor target table, indexed by processor number */
2565 const struct processor_costs *cost; /* Processor costs */
2566 const int align_loop; /* Default alignments. */
2567 const int align_loop_max_skip;
2568 const int align_jump;
2569 const int align_jump_max_skip;
2570 const int align_func;
2573 static const struct ptt processor_target_table[PROCESSOR_max] =
2575 {&i386_cost, 4, 3, 4, 3, 4},
2576 {&i486_cost, 16, 15, 16, 15, 16},
2577 {&pentium_cost, 16, 7, 16, 7, 16},
2578 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2579 {&geode_cost, 0, 0, 0, 0, 0},
2580 {&k6_cost, 32, 7, 32, 7, 32},
2581 {&athlon_cost, 16, 7, 16, 7, 16},
2582 {&pentium4_cost, 0, 0, 0, 0, 0},
2583 {&k8_cost, 16, 7, 16, 7, 16},
2584 {&nocona_cost, 0, 0, 0, 0, 0},
2585 /* Core 2 32-bit. */
2586 {&generic32_cost, 16, 10, 16, 10, 16},
2587 /* Core 2 64-bit. */
2588 {&generic64_cost, 16, 10, 16, 10, 16},
2589 /* Core i7 32-bit. */
2590 {&generic32_cost, 16, 10, 16, 10, 16},
2591 /* Core i7 64-bit. */
2592 {&generic64_cost, 16, 10, 16, 10, 16},
2593 {&generic32_cost, 16, 7, 16, 7, 16},
2594 {&generic64_cost, 16, 10, 16, 10, 16},
2595 {&amdfam10_cost, 32, 24, 32, 7, 32},
2596 {&bdver1_cost, 32, 24, 32, 7, 32},
2597 {&bdver2_cost, 32, 24, 32, 7, 32},
2598 {&btver1_cost, 32, 24, 32, 7, 32},
2599 {&atom_cost, 16, 15, 16, 7, 16}
2602 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2632 /* Return true if a red-zone is in use. */
2635 ix86_using_red_zone (void)
2637 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2640 /* Return a string that documents the current -m options. The caller is
2641 responsible for freeing the string. */
2644 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2645 const char *tune, enum fpmath_unit fpmath,
2648 struct ix86_target_opts
2650 const char *option; /* option string */
2651 HOST_WIDE_INT mask; /* isa mask options */
2654 /* This table is ordered so that options like -msse4.2 that imply
2655 preceding options while match those first. */
2656 static struct ix86_target_opts isa_opts[] =
2658 { "-m64", OPTION_MASK_ISA_64BIT },
2659 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2660 { "-mfma", OPTION_MASK_ISA_FMA },
2661 { "-mxop", OPTION_MASK_ISA_XOP },
2662 { "-mlwp", OPTION_MASK_ISA_LWP },
2663 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2664 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2665 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2666 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2667 { "-msse3", OPTION_MASK_ISA_SSE3 },
2668 { "-msse2", OPTION_MASK_ISA_SSE2 },
2669 { "-msse", OPTION_MASK_ISA_SSE },
2670 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2671 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2672 { "-mmmx", OPTION_MASK_ISA_MMX },
2673 { "-mabm", OPTION_MASK_ISA_ABM },
2674 { "-mbmi", OPTION_MASK_ISA_BMI },
2675 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2676 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2677 { "-mtbm", OPTION_MASK_ISA_TBM },
2678 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2679 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2680 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2681 { "-maes", OPTION_MASK_ISA_AES },
2682 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2683 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2684 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2685 { "-mf16c", OPTION_MASK_ISA_F16C },
2689 static struct ix86_target_opts flag_opts[] =
2691 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2692 { "-m80387", MASK_80387 },
2693 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2694 { "-malign-double", MASK_ALIGN_DOUBLE },
2695 { "-mcld", MASK_CLD },
2696 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2697 { "-mieee-fp", MASK_IEEE_FP },
2698 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2699 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2700 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2701 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2702 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2703 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2704 { "-mno-red-zone", MASK_NO_RED_ZONE },
2705 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2706 { "-mrecip", MASK_RECIP },
2707 { "-mrtd", MASK_RTD },
2708 { "-msseregparm", MASK_SSEREGPARM },
2709 { "-mstack-arg-probe", MASK_STACK_PROBE },
2710 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2711 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2712 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2713 { "-mvzeroupper", MASK_VZEROUPPER },
2714 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2715 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2716 { "-mprefer-avx128", MASK_PREFER_AVX128},
2719 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2722 char target_other[40];
2731 memset (opts, '\0', sizeof (opts));
2733 /* Add -march= option. */
2736 opts[num][0] = "-march=";
2737 opts[num++][1] = arch;
2740 /* Add -mtune= option. */
2743 opts[num][0] = "-mtune=";
2744 opts[num++][1] = tune;
2747 /* Pick out the options in isa options. */
2748 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2750 if ((isa & isa_opts[i].mask) != 0)
2752 opts[num++][0] = isa_opts[i].option;
2753 isa &= ~ isa_opts[i].mask;
2757 if (isa && add_nl_p)
2759 opts[num++][0] = isa_other;
2760 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2764 /* Add flag options. */
2765 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2767 if ((flags & flag_opts[i].mask) != 0)
2769 opts[num++][0] = flag_opts[i].option;
2770 flags &= ~ flag_opts[i].mask;
2774 if (flags && add_nl_p)
2776 opts[num++][0] = target_other;
2777 sprintf (target_other, "(other flags: %#x)", flags);
2780 /* Add -fpmath= option. */
2783 opts[num][0] = "-mfpmath=";
2784 switch ((int) fpmath)
2787 opts[num++][1] = "387";
2791 opts[num++][1] = "sse";
2794 case FPMATH_387 | FPMATH_SSE:
2795 opts[num++][1] = "sse+387";
2807 gcc_assert (num < ARRAY_SIZE (opts));
2809 /* Size the string. */
2811 sep_len = (add_nl_p) ? 3 : 1;
2812 for (i = 0; i < num; i++)
2815 for (j = 0; j < 2; j++)
2817 len += strlen (opts[i][j]);
2820 /* Build the string. */
2821 ret = ptr = (char *) xmalloc (len);
2824 for (i = 0; i < num; i++)
2828 for (j = 0; j < 2; j++)
2829 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2836 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2844 for (j = 0; j < 2; j++)
2847 memcpy (ptr, opts[i][j], len2[j]);
2849 line_len += len2[j];
2854 gcc_assert (ret + len >= ptr);
2859 /* Return true, if profiling code should be emitted before
2860 prologue. Otherwise it returns false.
2861 Note: For x86 with "hotfix" it is sorried. */
2863 ix86_profile_before_prologue (void)
2865 return flag_fentry != 0;
2868 /* Function that is callable from the debugger to print the current
2871 ix86_debug_options (void)
2873 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2874 ix86_arch_string, ix86_tune_string,
2879 fprintf (stderr, "%s\n\n", opts);
2883 fputs ("<no options>\n\n", stderr);
2888 /* Override various settings based on options. If MAIN_ARGS_P, the
2889 options are from the command line, otherwise they are from
2893 ix86_option_override_internal (bool main_args_p)
2896 unsigned int ix86_arch_mask, ix86_tune_mask;
2897 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2902 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2903 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2904 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2905 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2906 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2907 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2908 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2909 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2910 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2911 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2912 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2913 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2914 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2915 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2916 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2917 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2918 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2919 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2920 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2921 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2922 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2923 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2924 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2925 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2926 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2927 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2928 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2929 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2930 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2931 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2932 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2933 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2934 /* if this reaches 64, need to widen struct pta flags below */
2938 const char *const name; /* processor name or nickname. */
2939 const enum processor_type processor;
2940 const enum attr_cpu schedule;
2941 const unsigned HOST_WIDE_INT flags;
2943 const processor_alias_table[] =
2945 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2946 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2947 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2948 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2949 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2950 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2951 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2952 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2953 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2954 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2955 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2956 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2957 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2959 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2961 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2962 PTA_MMX | PTA_SSE | PTA_SSE2},
2963 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2964 PTA_MMX |PTA_SSE | PTA_SSE2},
2965 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2966 PTA_MMX | PTA_SSE | PTA_SSE2},
2967 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2968 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2969 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2970 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2971 | PTA_CX16 | PTA_NO_SAHF},
2972 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2973 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2974 | PTA_SSSE3 | PTA_CX16},
2975 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2976 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2977 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2978 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2979 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2980 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2981 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2982 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
2983 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2984 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2985 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2986 | PTA_RDRND | PTA_F16C},
2987 {"core-avx2", PROCESSOR_COREI7_64, CPU_COREI7,
2988 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2989 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
2990 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2991 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
2992 | PTA_FMA | PTA_MOVBE},
2993 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2994 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2995 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2996 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2997 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2998 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2999 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3000 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3001 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3002 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3003 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3004 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3005 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3006 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3007 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3008 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3009 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3010 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3011 {"x86-64", PROCESSOR_K8, CPU_K8,
3012 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3013 {"k8", PROCESSOR_K8, CPU_K8,
3014 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3015 | PTA_SSE2 | PTA_NO_SAHF},
3016 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3017 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3018 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3019 {"opteron", PROCESSOR_K8, CPU_K8,
3020 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3021 | PTA_SSE2 | PTA_NO_SAHF},
3022 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3023 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3024 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3025 {"athlon64", PROCESSOR_K8, CPU_K8,
3026 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3027 | PTA_SSE2 | PTA_NO_SAHF},
3028 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3029 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3030 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3031 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3032 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3033 | PTA_SSE2 | PTA_NO_SAHF},
3034 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3035 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3036 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3037 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3038 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3039 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3040 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3041 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3042 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3043 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3044 | PTA_XOP | PTA_LWP},
3045 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3046 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3047 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3048 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3049 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3051 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3052 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3053 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3054 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3055 0 /* flags are only used for -march switch. */ },
3056 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3057 PTA_64BIT /* flags are only used for -march switch. */ },
3060 /* -mrecip options. */
3063 const char *string; /* option name */
3064 unsigned int mask; /* mask bits to set */
3066 const recip_options[] =
3068 { "all", RECIP_MASK_ALL },
3069 { "none", RECIP_MASK_NONE },
3070 { "div", RECIP_MASK_DIV },
3071 { "sqrt", RECIP_MASK_SQRT },
3072 { "vec-div", RECIP_MASK_VEC_DIV },
3073 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3076 int const pta_size = ARRAY_SIZE (processor_alias_table);
3078 /* Set up prefix/suffix so the error messages refer to either the command
3079 line argument, or the attribute(target). */
3088 prefix = "option(\"";
3093 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3094 SUBTARGET_OVERRIDE_OPTIONS;
3097 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3098 SUBSUBTARGET_OVERRIDE_OPTIONS;
3102 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3104 /* -fPIC is the default for x86_64. */
3105 if (TARGET_MACHO && TARGET_64BIT)
3108 /* Need to check -mtune=generic first. */
3109 if (ix86_tune_string)
3111 if (!strcmp (ix86_tune_string, "generic")
3112 || !strcmp (ix86_tune_string, "i686")
3113 /* As special support for cross compilers we read -mtune=native
3114 as -mtune=generic. With native compilers we won't see the
3115 -mtune=native, as it was changed by the driver. */
3116 || !strcmp (ix86_tune_string, "native"))
3119 ix86_tune_string = "generic64";
3121 ix86_tune_string = "generic32";
3123 /* If this call is for setting the option attribute, allow the
3124 generic32/generic64 that was previously set. */
3125 else if (!main_args_p
3126 && (!strcmp (ix86_tune_string, "generic32")
3127 || !strcmp (ix86_tune_string, "generic64")))
3129 else if (!strncmp (ix86_tune_string, "generic", 7))
3130 error ("bad value (%s) for %stune=%s %s",
3131 ix86_tune_string, prefix, suffix, sw);
3132 else if (!strcmp (ix86_tune_string, "x86-64"))
3133 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3134 "%stune=k8%s or %stune=generic%s instead as appropriate",
3135 prefix, suffix, prefix, suffix, prefix, suffix);
3139 if (ix86_arch_string)
3140 ix86_tune_string = ix86_arch_string;
3141 if (!ix86_tune_string)
3143 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3144 ix86_tune_defaulted = 1;
3147 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3148 need to use a sensible tune option. */
3149 if (!strcmp (ix86_tune_string, "generic")
3150 || !strcmp (ix86_tune_string, "x86-64")
3151 || !strcmp (ix86_tune_string, "i686"))
3154 ix86_tune_string = "generic64";
3156 ix86_tune_string = "generic32";
3160 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3162 /* rep; movq isn't available in 32-bit code. */
3163 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3164 ix86_stringop_alg = no_stringop;
3167 if (!ix86_arch_string)
3168 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3170 ix86_arch_specified = 1;
3172 if (!global_options_set.x_ix86_abi)
3173 ix86_abi = DEFAULT_ABI;
3175 if (global_options_set.x_ix86_cmodel)
3177 switch (ix86_cmodel)
3182 ix86_cmodel = CM_SMALL_PIC;
3184 error ("code model %qs not supported in the %s bit mode",
3191 ix86_cmodel = CM_MEDIUM_PIC;
3193 error ("code model %qs not supported in the %s bit mode",
3195 else if (TARGET_X32)
3196 error ("code model %qs not supported in x32 mode",
3203 ix86_cmodel = CM_LARGE_PIC;
3205 error ("code model %qs not supported in the %s bit mode",
3207 else if (TARGET_X32)
3208 error ("code model %qs not supported in x32 mode",
3214 error ("code model %s does not support PIC mode", "32");
3216 error ("code model %qs not supported in the %s bit mode",
3223 error ("code model %s does not support PIC mode", "kernel");
3224 ix86_cmodel = CM_32;
3227 error ("code model %qs not supported in the %s bit mode",
3237 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3238 use of rip-relative addressing. This eliminates fixups that
3239 would otherwise be needed if this object is to be placed in a
3240 DLL, and is essentially just as efficient as direct addressing. */
3241 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3242 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3243 else if (TARGET_64BIT)
3244 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3246 ix86_cmodel = CM_32;
3248 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3250 error ("-masm=intel not supported in this configuration");
3251 ix86_asm_dialect = ASM_ATT;
3253 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3254 sorry ("%i-bit mode not compiled in",
3255 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3257 for (i = 0; i < pta_size; i++)
3258 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3260 ix86_schedule = processor_alias_table[i].schedule;
3261 ix86_arch = processor_alias_table[i].processor;
3262 /* Default cpu tuning to the architecture. */
3263 ix86_tune = ix86_arch;
3265 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3266 error ("CPU you selected does not support x86-64 "
3269 if (processor_alias_table[i].flags & PTA_MMX
3270 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3271 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3272 if (processor_alias_table[i].flags & PTA_3DNOW
3273 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3274 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3275 if (processor_alias_table[i].flags & PTA_3DNOW_A
3276 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3277 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3278 if (processor_alias_table[i].flags & PTA_SSE
3279 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3280 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3281 if (processor_alias_table[i].flags & PTA_SSE2
3282 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3283 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3284 if (processor_alias_table[i].flags & PTA_SSE3
3285 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3286 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3287 if (processor_alias_table[i].flags & PTA_SSSE3
3288 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3289 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3290 if (processor_alias_table[i].flags & PTA_SSE4_1
3291 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3292 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3293 if (processor_alias_table[i].flags & PTA_SSE4_2
3294 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3295 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3296 if (processor_alias_table[i].flags & PTA_AVX
3297 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3298 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3299 if (processor_alias_table[i].flags & PTA_AVX2
3300 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3301 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3302 if (processor_alias_table[i].flags & PTA_FMA
3303 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3304 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3305 if (processor_alias_table[i].flags & PTA_SSE4A
3306 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3307 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3308 if (processor_alias_table[i].flags & PTA_FMA4
3309 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3310 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3311 if (processor_alias_table[i].flags & PTA_XOP
3312 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3313 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3314 if (processor_alias_table[i].flags & PTA_LWP
3315 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3316 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3317 if (processor_alias_table[i].flags & PTA_ABM
3318 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3319 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3320 if (processor_alias_table[i].flags & PTA_BMI
3321 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3322 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3323 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3324 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3325 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3326 if (processor_alias_table[i].flags & PTA_TBM
3327 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3328 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3329 if (processor_alias_table[i].flags & PTA_BMI2
3330 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3331 ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3332 if (processor_alias_table[i].flags & PTA_CX16
3333 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3334 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3335 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3336 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3337 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3338 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3339 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3340 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3341 if (processor_alias_table[i].flags & PTA_MOVBE
3342 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3343 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3344 if (processor_alias_table[i].flags & PTA_AES
3345 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3346 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3347 if (processor_alias_table[i].flags & PTA_PCLMUL
3348 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3349 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3350 if (processor_alias_table[i].flags & PTA_FSGSBASE
3351 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3352 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3353 if (processor_alias_table[i].flags & PTA_RDRND
3354 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3355 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3356 if (processor_alias_table[i].flags & PTA_F16C
3357 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3358 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3359 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3360 x86_prefetch_sse = true;
3365 if (!strcmp (ix86_arch_string, "generic"))
3366 error ("generic CPU can be used only for %stune=%s %s",
3367 prefix, suffix, sw);
3368 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3369 error ("bad value (%s) for %sarch=%s %s",
3370 ix86_arch_string, prefix, suffix, sw);
3372 ix86_arch_mask = 1u << ix86_arch;
3373 for (i = 0; i < X86_ARCH_LAST; ++i)
3374 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3376 for (i = 0; i < pta_size; i++)
3377 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3379 ix86_schedule = processor_alias_table[i].schedule;
3380 ix86_tune = processor_alias_table[i].processor;
3383 if (!(processor_alias_table[i].flags & PTA_64BIT))
3385 if (ix86_tune_defaulted)
3387 ix86_tune_string = "x86-64";
3388 for (i = 0; i < pta_size; i++)
3389 if (! strcmp (ix86_tune_string,
3390 processor_alias_table[i].name))
3392 ix86_schedule = processor_alias_table[i].schedule;
3393 ix86_tune = processor_alias_table[i].processor;
3396 error ("CPU you selected does not support x86-64 "
3402 /* Adjust tuning when compiling for 32-bit ABI. */
3405 case PROCESSOR_GENERIC64:
3406 ix86_tune = PROCESSOR_GENERIC32;
3407 ix86_schedule = CPU_PENTIUMPRO;
3410 case PROCESSOR_CORE2_64:
3411 ix86_tune = PROCESSOR_CORE2_32;
3414 case PROCESSOR_COREI7_64:
3415 ix86_tune = PROCESSOR_COREI7_32;
3422 /* Intel CPUs have always interpreted SSE prefetch instructions as
3423 NOPs; so, we can enable SSE prefetch instructions even when
3424 -mtune (rather than -march) points us to a processor that has them.
3425 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3426 higher processors. */
3428 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3429 x86_prefetch_sse = true;
3433 if (ix86_tune_specified && i == pta_size)
3434 error ("bad value (%s) for %stune=%s %s",
3435 ix86_tune_string, prefix, suffix, sw);
3437 ix86_tune_mask = 1u << ix86_tune;
3438 for (i = 0; i < X86_TUNE_LAST; ++i)
3439 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3441 #ifndef USE_IX86_FRAME_POINTER
3442 #define USE_IX86_FRAME_POINTER 0
3445 #ifndef USE_X86_64_FRAME_POINTER
3446 #define USE_X86_64_FRAME_POINTER 0
3449 /* Set the default values for switches whose default depends on TARGET_64BIT
3450 in case they weren't overwritten by command line options. */
3453 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3454 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3455 if (flag_asynchronous_unwind_tables == 2)
3456 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3457 if (flag_pcc_struct_return == 2)
3458 flag_pcc_struct_return = 0;
3462 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3463 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3464 if (flag_asynchronous_unwind_tables == 2)
3465 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3466 if (flag_pcc_struct_return == 2)
3467 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3471 ix86_cost = &ix86_size_cost;
3473 ix86_cost = processor_target_table[ix86_tune].cost;
3475 /* Arrange to set up i386_stack_locals for all functions. */
3476 init_machine_status = ix86_init_machine_status;
3478 /* Validate -mregparm= value. */
3479 if (global_options_set.x_ix86_regparm)
3482 warning (0, "-mregparm is ignored in 64-bit mode");
3483 if (ix86_regparm > REGPARM_MAX)
3485 error ("-mregparm=%d is not between 0 and %d",
3486 ix86_regparm, REGPARM_MAX);
3491 ix86_regparm = REGPARM_MAX;
3493 /* Default align_* from the processor table. */
3494 if (align_loops == 0)
3496 align_loops = processor_target_table[ix86_tune].align_loop;
3497 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3499 if (align_jumps == 0)
3501 align_jumps = processor_target_table[ix86_tune].align_jump;
3502 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3504 if (align_functions == 0)
3506 align_functions = processor_target_table[ix86_tune].align_func;
3509 /* Provide default for -mbranch-cost= value. */
3510 if (!global_options_set.x_ix86_branch_cost)
3511 ix86_branch_cost = ix86_cost->branch_cost;
3515 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3517 /* Enable by default the SSE and MMX builtins. Do allow the user to
3518 explicitly disable any of these. In particular, disabling SSE and
3519 MMX for kernel code is extremely useful. */
3520 if (!ix86_arch_specified)
3522 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3523 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3526 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3530 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3532 if (!ix86_arch_specified)
3534 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3536 /* i386 ABI does not specify red zone. It still makes sense to use it
3537 when programmer takes care to stack from being destroyed. */
3538 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3539 target_flags |= MASK_NO_RED_ZONE;
3542 /* Keep nonleaf frame pointers. */
3543 if (flag_omit_frame_pointer)
3544 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3545 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3546 flag_omit_frame_pointer = 1;
3548 /* If we're doing fast math, we don't care about comparison order
3549 wrt NaNs. This lets us use a shorter comparison sequence. */
3550 if (flag_finite_math_only)
3551 target_flags &= ~MASK_IEEE_FP;
3553 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3554 since the insns won't need emulation. */
3555 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3556 target_flags &= ~MASK_NO_FANCY_MATH_387;
3558 /* Likewise, if the target doesn't have a 387, or we've specified
3559 software floating point, don't use 387 inline intrinsics. */
3561 target_flags |= MASK_NO_FANCY_MATH_387;
3563 /* Turn on MMX builtins for -msse. */
3566 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3567 x86_prefetch_sse = true;
3570 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3571 if (TARGET_SSE4_2 || TARGET_ABM)
3572 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3574 /* Turn on lzcnt instruction for -mabm. */
3576 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3578 /* Validate -mpreferred-stack-boundary= value or default it to
3579 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3580 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3581 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3583 int min = (TARGET_64BIT ? 4 : 2);
3584 int max = (TARGET_SEH ? 4 : 12);
3586 if (ix86_preferred_stack_boundary_arg < min
3587 || ix86_preferred_stack_boundary_arg > max)
3590 error ("-mpreferred-stack-boundary is not supported "
3593 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3594 ix86_preferred_stack_boundary_arg, min, max);
3597 ix86_preferred_stack_boundary
3598 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3601 /* Set the default value for -mstackrealign. */
3602 if (ix86_force_align_arg_pointer == -1)
3603 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3605 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3607 /* Validate -mincoming-stack-boundary= value or default it to
3608 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3609 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3610 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3612 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3613 || ix86_incoming_stack_boundary_arg > 12)
3614 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3615 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3618 ix86_user_incoming_stack_boundary
3619 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3620 ix86_incoming_stack_boundary
3621 = ix86_user_incoming_stack_boundary;
3625 /* Accept -msseregparm only if at least SSE support is enabled. */
3626 if (TARGET_SSEREGPARM
3628 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3630 if (global_options_set.x_ix86_fpmath)
3632 if (ix86_fpmath & FPMATH_SSE)
3636 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3637 ix86_fpmath = FPMATH_387;
3639 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3641 warning (0, "387 instruction set disabled, using SSE arithmetics");
3642 ix86_fpmath = FPMATH_SSE;
3647 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3649 /* If the i387 is disabled, then do not return values in it. */
3651 target_flags &= ~MASK_FLOAT_RETURNS;
3653 /* Use external vectorized library in vectorizing intrinsics. */
3654 if (global_options_set.x_ix86_veclibabi_type)
3655 switch (ix86_veclibabi_type)
3657 case ix86_veclibabi_type_svml:
3658 ix86_veclib_handler = ix86_veclibabi_svml;
3661 case ix86_veclibabi_type_acml:
3662 ix86_veclib_handler = ix86_veclibabi_acml;
3669 if ((!USE_IX86_FRAME_POINTER
3670 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3671 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3673 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3675 /* ??? Unwind info is not correct around the CFG unless either a frame
3676 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3677 unwind info generation to be aware of the CFG and propagating states
3679 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3680 || flag_exceptions || flag_non_call_exceptions)
3681 && flag_omit_frame_pointer
3682 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3684 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3685 warning (0, "unwind tables currently require either a frame pointer "
3686 "or %saccumulate-outgoing-args%s for correctness",
3688 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3691 /* If stack probes are required, the space used for large function
3692 arguments on the stack must also be probed, so enable
3693 -maccumulate-outgoing-args so this happens in the prologue. */
3694 if (TARGET_STACK_PROBE
3695 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3697 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3698 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3699 "for correctness", prefix, suffix);
3700 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3703 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3706 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3707 p = strchr (internal_label_prefix, 'X');
3708 internal_label_prefix_len = p - internal_label_prefix;
3712 /* When scheduling description is not available, disable scheduler pass
3713 so it won't slow down the compilation and make x87 code slower. */
3714 if (!TARGET_SCHEDULE)
3715 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3717 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3718 ix86_cost->simultaneous_prefetches,
3719 global_options.x_param_values,
3720 global_options_set.x_param_values);
3721 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3722 global_options.x_param_values,
3723 global_options_set.x_param_values);
3724 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3725 global_options.x_param_values,
3726 global_options_set.x_param_values);
3727 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3728 global_options.x_param_values,
3729 global_options_set.x_param_values);
3731 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3732 if (flag_prefetch_loop_arrays < 0
3735 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3736 flag_prefetch_loop_arrays = 1;
3738 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3739 can be optimized to ap = __builtin_next_arg (0). */
3740 if (!TARGET_64BIT && !flag_split_stack)
3741 targetm.expand_builtin_va_start = NULL;
3745 ix86_gen_leave = gen_leave_rex64;
3746 ix86_gen_add3 = gen_adddi3;
3747 ix86_gen_sub3 = gen_subdi3;
3748 ix86_gen_sub3_carry = gen_subdi3_carry;
3749 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3750 ix86_gen_monitor = gen_sse3_monitor64;
3751 ix86_gen_andsp = gen_anddi3;
3752 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3753 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3754 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3758 ix86_gen_leave = gen_leave;
3759 ix86_gen_add3 = gen_addsi3;
3760 ix86_gen_sub3 = gen_subsi3;
3761 ix86_gen_sub3_carry = gen_subsi3_carry;
3762 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3763 ix86_gen_monitor = gen_sse3_monitor;
3764 ix86_gen_andsp = gen_andsi3;
3765 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3766 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3767 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3771 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3773 target_flags |= MASK_CLD & ~target_flags_explicit;
3776 if (!TARGET_64BIT && flag_pic)
3778 if (flag_fentry > 0)
3779 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3783 else if (TARGET_SEH)
3785 if (flag_fentry == 0)
3786 sorry ("-mno-fentry isn%'t compatible with SEH");
3789 else if (flag_fentry < 0)
3791 #if defined(PROFILE_BEFORE_PROLOGUE)
3800 /* When not optimize for size, enable vzeroupper optimization for
3801 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3802 AVX unaligned load/store. */
3805 if (flag_expensive_optimizations
3806 && !(target_flags_explicit & MASK_VZEROUPPER))
3807 target_flags |= MASK_VZEROUPPER;
3808 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3809 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3810 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3811 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3812 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3813 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3814 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3815 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3816 target_flags |= MASK_PREFER_AVX128;
3821 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3822 target_flags &= ~MASK_VZEROUPPER;
3825 if (ix86_recip_name)
3827 char *p = ASTRDUP (ix86_recip_name);
3829 unsigned int mask, i;
3832 while ((q = strtok (p, ",")) != NULL)
3843 if (!strcmp (q, "default"))
3844 mask = RECIP_MASK_ALL;
3847 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
3848 if (!strcmp (q, recip_options[i].string))
3850 mask = recip_options[i].mask;
3854 if (i == ARRAY_SIZE (recip_options))
3856 error ("unknown option for -mrecip=%s", q);
3858 mask = RECIP_MASK_NONE;
3862 recip_mask_explicit |= mask;
3864 recip_mask &= ~mask;
3871 recip_mask |= RECIP_MASK_ALL & ~recip_mask_explicit;
3872 else if (target_flags_explicit & MASK_RECIP)
3873 recip_mask &= ~(RECIP_MASK_ALL & ~recip_mask_explicit);
3875 /* Save the initial options in case the user does function specific
3878 target_option_default_node = target_option_current_node
3879 = build_target_option_node ();
3882 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3885 function_pass_avx256_p (const_rtx val)
3890 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3893 if (GET_CODE (val) == PARALLEL)
3898 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3900 r = XVECEXP (val, 0, i);
3901 if (GET_CODE (r) == EXPR_LIST
3903 && REG_P (XEXP (r, 0))
3904 && (GET_MODE (XEXP (r, 0)) == OImode
3905 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3913 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3916 ix86_option_override (void)
3918 ix86_option_override_internal (true);
3921 /* Update register usage after having seen the compiler flags. */
3924 ix86_conditional_register_usage (void)
3929 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3931 if (fixed_regs[i] > 1)
3932 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3933 if (call_used_regs[i] > 1)
3934 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3937 /* The PIC register, if it exists, is fixed. */
3938 j = PIC_OFFSET_TABLE_REGNUM;
3939 if (j != INVALID_REGNUM)
3940 fixed_regs[j] = call_used_regs[j] = 1;
3942 /* The 64-bit MS_ABI changes the set of call-used registers. */
3943 if (TARGET_64BIT_MS_ABI)
3945 call_used_regs[SI_REG] = 0;
3946 call_used_regs[DI_REG] = 0;
3947 call_used_regs[XMM6_REG] = 0;
3948 call_used_regs[XMM7_REG] = 0;
3949 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3950 call_used_regs[i] = 0;
3953 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3954 other call-clobbered regs for 64-bit. */
3957 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3959 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3960 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3961 && call_used_regs[i])
3962 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3965 /* If MMX is disabled, squash the registers. */
3967 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3968 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3969 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3971 /* If SSE is disabled, squash the registers. */
3973 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3974 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3975 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3977 /* If the FPU is disabled, squash the registers. */
3978 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3979 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3980 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3981 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3983 /* If 32-bit, squash the 64-bit registers. */
3986 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3988 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3994 /* Save the current options */
3997 ix86_function_specific_save (struct cl_target_option *ptr)
3999 ptr->arch = ix86_arch;
4000 ptr->schedule = ix86_schedule;
4001 ptr->tune = ix86_tune;
4002 ptr->branch_cost = ix86_branch_cost;
4003 ptr->tune_defaulted = ix86_tune_defaulted;
4004 ptr->arch_specified = ix86_arch_specified;
4005 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4006 ptr->ix86_target_flags_explicit = target_flags_explicit;
4007 ptr->x_recip_mask_explicit = recip_mask_explicit;
4009 /* The fields are char but the variables are not; make sure the
4010 values fit in the fields. */
4011 gcc_assert (ptr->arch == ix86_arch);
4012 gcc_assert (ptr->schedule == ix86_schedule);
4013 gcc_assert (ptr->tune == ix86_tune);
4014 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4017 /* Restore the current options */
4020 ix86_function_specific_restore (struct cl_target_option *ptr)
4022 enum processor_type old_tune = ix86_tune;
4023 enum processor_type old_arch = ix86_arch;
4024 unsigned int ix86_arch_mask, ix86_tune_mask;
4027 ix86_arch = (enum processor_type) ptr->arch;
4028 ix86_schedule = (enum attr_cpu) ptr->schedule;
4029 ix86_tune = (enum processor_type) ptr->tune;
4030 ix86_branch_cost = ptr->branch_cost;
4031 ix86_tune_defaulted = ptr->tune_defaulted;
4032 ix86_arch_specified = ptr->arch_specified;
4033 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4034 target_flags_explicit = ptr->ix86_target_flags_explicit;
4035 recip_mask_explicit = ptr->x_recip_mask_explicit;
4037 /* Recreate the arch feature tests if the arch changed */
4038 if (old_arch != ix86_arch)
4040 ix86_arch_mask = 1u << ix86_arch;
4041 for (i = 0; i < X86_ARCH_LAST; ++i)
4042 ix86_arch_features[i]
4043 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4046 /* Recreate the tune optimization tests */
4047 if (old_tune != ix86_tune)
4049 ix86_tune_mask = 1u << ix86_tune;
4050 for (i = 0; i < X86_TUNE_LAST; ++i)
4051 ix86_tune_features[i]
4052 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4056 /* Print the current options */
4059 ix86_function_specific_print (FILE *file, int indent,
4060 struct cl_target_option *ptr)
4063 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4064 NULL, NULL, ptr->x_ix86_fpmath, false);
4066 fprintf (file, "%*sarch = %d (%s)\n",
4069 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4070 ? cpu_names[ptr->arch]
4073 fprintf (file, "%*stune = %d (%s)\n",
4076 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4077 ? cpu_names[ptr->tune]
4080 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4084 fprintf (file, "%*s%s\n", indent, "", target_string);
4085 free (target_string);
4090 /* Inner function to process the attribute((target(...))), take an argument and
4091 set the current options from the argument. If we have a list, recursively go
4095 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4096 struct gcc_options *enum_opts_set)
4101 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4102 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4103 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4104 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4105 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4121 enum ix86_opt_type type;
4126 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4127 IX86_ATTR_ISA ("abm", OPT_mabm),
4128 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4129 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4130 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4131 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4132 IX86_ATTR_ISA ("aes", OPT_maes),
4133 IX86_ATTR_ISA ("avx", OPT_mavx),
4134 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4135 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4136 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4137 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4138 IX86_ATTR_ISA ("sse", OPT_msse),
4139 IX86_ATTR_ISA ("sse2", OPT_msse2),
4140 IX86_ATTR_ISA ("sse3", OPT_msse3),
4141 IX86_ATTR_ISA ("sse4", OPT_msse4),
4142 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4143 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4144 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4145 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4146 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4147 IX86_ATTR_ISA ("fma", OPT_mfma),
4148 IX86_ATTR_ISA ("xop", OPT_mxop),
4149 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4150 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4151 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4152 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4155 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4157 /* string options */
4158 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4159 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4162 IX86_ATTR_YES ("cld",
4166 IX86_ATTR_NO ("fancy-math-387",
4167 OPT_mfancy_math_387,
4168 MASK_NO_FANCY_MATH_387),
4170 IX86_ATTR_YES ("ieee-fp",
4174 IX86_ATTR_YES ("inline-all-stringops",
4175 OPT_minline_all_stringops,
4176 MASK_INLINE_ALL_STRINGOPS),
4178 IX86_ATTR_YES ("inline-stringops-dynamically",
4179 OPT_minline_stringops_dynamically,
4180 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4182 IX86_ATTR_NO ("align-stringops",
4183 OPT_mno_align_stringops,
4184 MASK_NO_ALIGN_STRINGOPS),
4186 IX86_ATTR_YES ("recip",
4192 /* If this is a list, recurse to get the options. */
4193 if (TREE_CODE (args) == TREE_LIST)
4197 for (; args; args = TREE_CHAIN (args))
4198 if (TREE_VALUE (args)
4199 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4200 p_strings, enum_opts_set))
4206 else if (TREE_CODE (args) != STRING_CST)
4209 /* Handle multiple arguments separated by commas. */
4210 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4212 while (next_optstr && *next_optstr != '\0')
4214 char *p = next_optstr;
4216 char *comma = strchr (next_optstr, ',');
4217 const char *opt_string;
4218 size_t len, opt_len;
4223 enum ix86_opt_type type = ix86_opt_unknown;
4229 len = comma - next_optstr;
4230 next_optstr = comma + 1;
4238 /* Recognize no-xxx. */
4239 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4248 /* Find the option. */
4251 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4253 type = attrs[i].type;
4254 opt_len = attrs[i].len;
4255 if (ch == attrs[i].string[0]
4256 && ((type != ix86_opt_str && type != ix86_opt_enum)
4259 && memcmp (p, attrs[i].string, opt_len) == 0)
4262 mask = attrs[i].mask;
4263 opt_string = attrs[i].string;
4268 /* Process the option. */
4271 error ("attribute(target(\"%s\")) is unknown", orig_p);
4275 else if (type == ix86_opt_isa)
4277 struct cl_decoded_option decoded;
4279 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4280 ix86_handle_option (&global_options, &global_options_set,
4281 &decoded, input_location);
4284 else if (type == ix86_opt_yes || type == ix86_opt_no)
4286 if (type == ix86_opt_no)
4287 opt_set_p = !opt_set_p;
4290 target_flags |= mask;
4292 target_flags &= ~mask;
4295 else if (type == ix86_opt_str)
4299 error ("option(\"%s\") was already specified", opt_string);
4303 p_strings[opt] = xstrdup (p + opt_len);
4306 else if (type == ix86_opt_enum)
4311 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4313 set_option (&global_options, enum_opts_set, opt, value,
4314 p + opt_len, DK_UNSPECIFIED, input_location,
4318 error ("attribute(target(\"%s\")) is unknown", orig_p);
4330 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4333 ix86_valid_target_attribute_tree (tree args)
4335 const char *orig_arch_string = ix86_arch_string;
4336 const char *orig_tune_string = ix86_tune_string;
4337 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4338 int orig_tune_defaulted = ix86_tune_defaulted;
4339 int orig_arch_specified = ix86_arch_specified;
4340 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4343 struct cl_target_option *def
4344 = TREE_TARGET_OPTION (target_option_default_node);
4345 struct gcc_options enum_opts_set;
4347 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4349 /* Process each of the options on the chain. */
4350 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4354 /* If the changed options are different from the default, rerun
4355 ix86_option_override_internal, and then save the options away.
4356 The string options are are attribute options, and will be undone
4357 when we copy the save structure. */
4358 if (ix86_isa_flags != def->x_ix86_isa_flags
4359 || target_flags != def->x_target_flags
4360 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4361 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4362 || enum_opts_set.x_ix86_fpmath)
4364 /* If we are using the default tune= or arch=, undo the string assigned,
4365 and use the default. */
4366 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4367 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4368 else if (!orig_arch_specified)
4369 ix86_arch_string = NULL;
4371 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4372 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4373 else if (orig_tune_defaulted)
4374 ix86_tune_string = NULL;
4376 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4377 if (enum_opts_set.x_ix86_fpmath)
4378 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4379 else if (!TARGET_64BIT && TARGET_SSE)
4381 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4382 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4385 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4386 ix86_option_override_internal (false);
4388 /* Add any builtin functions with the new isa if any. */
4389 ix86_add_new_builtins (ix86_isa_flags);
4391 /* Save the current options unless we are validating options for
4393 t = build_target_option_node ();
4395 ix86_arch_string = orig_arch_string;
4396 ix86_tune_string = orig_tune_string;
4397 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4399 /* Free up memory allocated to hold the strings */
4400 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4401 free (option_strings[i]);
4407 /* Hook to validate attribute((target("string"))). */
4410 ix86_valid_target_attribute_p (tree fndecl,
4411 tree ARG_UNUSED (name),
4413 int ARG_UNUSED (flags))
4415 struct cl_target_option cur_target;
4417 tree old_optimize = build_optimization_node ();
4418 tree new_target, new_optimize;
4419 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4421 /* If the function changed the optimization levels as well as setting target
4422 options, start with the optimizations specified. */
4423 if (func_optimize && func_optimize != old_optimize)
4424 cl_optimization_restore (&global_options,
4425 TREE_OPTIMIZATION (func_optimize));
4427 /* The target attributes may also change some optimization flags, so update
4428 the optimization options if necessary. */
4429 cl_target_option_save (&cur_target, &global_options);
4430 new_target = ix86_valid_target_attribute_tree (args);
4431 new_optimize = build_optimization_node ();
4438 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4440 if (old_optimize != new_optimize)
4441 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4444 cl_target_option_restore (&global_options, &cur_target);
4446 if (old_optimize != new_optimize)
4447 cl_optimization_restore (&global_options,
4448 TREE_OPTIMIZATION (old_optimize));
4454 /* Hook to determine if one function can safely inline another. */
4457 ix86_can_inline_p (tree caller, tree callee)
4460 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4461 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4463 /* If callee has no option attributes, then it is ok to inline. */
4467 /* If caller has no option attributes, but callee does then it is not ok to
4469 else if (!caller_tree)
4474 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4475 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4477 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4478 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4480 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4481 != callee_opts->x_ix86_isa_flags)
4484 /* See if we have the same non-isa options. */
4485 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4488 /* See if arch, tune, etc. are the same. */
4489 else if (caller_opts->arch != callee_opts->arch)
4492 else if (caller_opts->tune != callee_opts->tune)
4495 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4498 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4509 /* Remember the last target of ix86_set_current_function. */
4510 static GTY(()) tree ix86_previous_fndecl;
4512 /* Establish appropriate back-end context for processing the function
4513 FNDECL. The argument might be NULL to indicate processing at top
4514 level, outside of any function scope. */
4516 ix86_set_current_function (tree fndecl)
4518 /* Only change the context if the function changes. This hook is called
4519 several times in the course of compiling a function, and we don't want to
4520 slow things down too much or call target_reinit when it isn't safe. */
4521 if (fndecl && fndecl != ix86_previous_fndecl)
4523 tree old_tree = (ix86_previous_fndecl
4524 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4527 tree new_tree = (fndecl
4528 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4531 ix86_previous_fndecl = fndecl;
4532 if (old_tree == new_tree)
4537 cl_target_option_restore (&global_options,
4538 TREE_TARGET_OPTION (new_tree));
4544 struct cl_target_option *def
4545 = TREE_TARGET_OPTION (target_option_current_node);
4547 cl_target_option_restore (&global_options, def);
4554 /* Return true if this goes in large data/bss. */
4557 ix86_in_large_data_p (tree exp)
4559 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4562 /* Functions are never large data. */
4563 if (TREE_CODE (exp) == FUNCTION_DECL)
4566 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4568 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4569 if (strcmp (section, ".ldata") == 0
4570 || strcmp (section, ".lbss") == 0)
4576 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4578 /* If this is an incomplete type with size 0, then we can't put it
4579 in data because it might be too big when completed. */
4580 if (!size || size > ix86_section_threshold)
4587 /* Switch to the appropriate section for output of DECL.
4588 DECL is either a `VAR_DECL' node or a constant of some sort.
4589 RELOC indicates whether forming the initial value of DECL requires
4590 link-time relocations. */
4592 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4596 x86_64_elf_select_section (tree decl, int reloc,
4597 unsigned HOST_WIDE_INT align)
4599 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4600 && ix86_in_large_data_p (decl))
4602 const char *sname = NULL;
4603 unsigned int flags = SECTION_WRITE;
4604 switch (categorize_decl_for_section (decl, reloc))
4609 case SECCAT_DATA_REL:
4610 sname = ".ldata.rel";
4612 case SECCAT_DATA_REL_LOCAL:
4613 sname = ".ldata.rel.local";
4615 case SECCAT_DATA_REL_RO:
4616 sname = ".ldata.rel.ro";
4618 case SECCAT_DATA_REL_RO_LOCAL:
4619 sname = ".ldata.rel.ro.local";
4623 flags |= SECTION_BSS;
4626 case SECCAT_RODATA_MERGE_STR:
4627 case SECCAT_RODATA_MERGE_STR_INIT:
4628 case SECCAT_RODATA_MERGE_CONST:
4632 case SECCAT_SRODATA:
4639 /* We don't split these for medium model. Place them into
4640 default sections and hope for best. */
4645 /* We might get called with string constants, but get_named_section
4646 doesn't like them as they are not DECLs. Also, we need to set
4647 flags in that case. */
4649 return get_section (sname, flags, NULL);
4650 return get_named_section (decl, sname, reloc);
4653 return default_elf_select_section (decl, reloc, align);
4656 /* Build up a unique section name, expressed as a
4657 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4658 RELOC indicates whether the initial value of EXP requires
4659 link-time relocations. */
4661 static void ATTRIBUTE_UNUSED
4662 x86_64_elf_unique_section (tree decl, int reloc)
4664 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4665 && ix86_in_large_data_p (decl))
4667 const char *prefix = NULL;
4668 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4669 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4671 switch (categorize_decl_for_section (decl, reloc))
4674 case SECCAT_DATA_REL:
4675 case SECCAT_DATA_REL_LOCAL:
4676 case SECCAT_DATA_REL_RO:
4677 case SECCAT_DATA_REL_RO_LOCAL:
4678 prefix = one_only ? ".ld" : ".ldata";
4681 prefix = one_only ? ".lb" : ".lbss";
4684 case SECCAT_RODATA_MERGE_STR:
4685 case SECCAT_RODATA_MERGE_STR_INIT:
4686 case SECCAT_RODATA_MERGE_CONST:
4687 prefix = one_only ? ".lr" : ".lrodata";
4689 case SECCAT_SRODATA:
4696 /* We don't split these for medium model. Place them into
4697 default sections and hope for best. */
4702 const char *name, *linkonce;
4705 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4706 name = targetm.strip_name_encoding (name);
4708 /* If we're using one_only, then there needs to be a .gnu.linkonce
4709 prefix to the section name. */
4710 linkonce = one_only ? ".gnu.linkonce" : "";
4712 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4714 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4718 default_unique_section (decl, reloc);
4721 #ifdef COMMON_ASM_OP
4722 /* This says how to output assembler code to declare an
4723 uninitialized external linkage data object.
4725 For medium model x86-64 we need to use .largecomm opcode for
4728 x86_elf_aligned_common (FILE *file,
4729 const char *name, unsigned HOST_WIDE_INT size,
4732 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4733 && size > (unsigned int)ix86_section_threshold)
4734 fputs (".largecomm\t", file);
4736 fputs (COMMON_ASM_OP, file);
4737 assemble_name (file, name);
4738 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4739 size, align / BITS_PER_UNIT);
4743 /* Utility function for targets to use in implementing
4744 ASM_OUTPUT_ALIGNED_BSS. */
4747 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4748 const char *name, unsigned HOST_WIDE_INT size,
4751 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4752 && size > (unsigned int)ix86_section_threshold)
4753 switch_to_section (get_named_section (decl, ".lbss", 0));
4755 switch_to_section (bss_section);
4756 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4757 #ifdef ASM_DECLARE_OBJECT_NAME
4758 last_assemble_variable_decl = decl;
4759 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4761 /* Standard thing is just output label for the object. */
4762 ASM_OUTPUT_LABEL (file, name);
4763 #endif /* ASM_DECLARE_OBJECT_NAME */
4764 ASM_OUTPUT_SKIP (file, size ? size : 1);
4767 /* Decide whether we must probe the stack before any space allocation
4768 on this target. It's essentially TARGET_STACK_PROBE except when
4769 -fstack-check causes the stack to be already probed differently. */
4772 ix86_target_stack_probe (void)
4774 /* Do not probe the stack twice if static stack checking is enabled. */
4775 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4778 return TARGET_STACK_PROBE;
4781 /* Decide whether we can make a sibling call to a function. DECL is the
4782 declaration of the function being targeted by the call and EXP is the
4783 CALL_EXPR representing the call. */
4786 ix86_function_ok_for_sibcall (tree decl, tree exp)
4788 tree type, decl_or_type;
4791 /* If we are generating position-independent code, we cannot sibcall
4792 optimize any indirect call, or a direct call to a global function,
4793 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4797 && (!decl || !targetm.binds_local_p (decl)))
4800 /* If we need to align the outgoing stack, then sibcalling would
4801 unalign the stack, which may break the called function. */
4802 if (ix86_minimum_incoming_stack_boundary (true)
4803 < PREFERRED_STACK_BOUNDARY)
4808 decl_or_type = decl;
4809 type = TREE_TYPE (decl);
4813 /* We're looking at the CALL_EXPR, we need the type of the function. */
4814 type = CALL_EXPR_FN (exp); /* pointer expression */
4815 type = TREE_TYPE (type); /* pointer type */
4816 type = TREE_TYPE (type); /* function type */
4817 decl_or_type = type;
4820 /* Check that the return value locations are the same. Like
4821 if we are returning floats on the 80387 register stack, we cannot
4822 make a sibcall from a function that doesn't return a float to a
4823 function that does or, conversely, from a function that does return
4824 a float to a function that doesn't; the necessary stack adjustment
4825 would not be executed. This is also the place we notice
4826 differences in the return value ABI. Note that it is ok for one
4827 of the functions to have void return type as long as the return
4828 value of the other is passed in a register. */
4829 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4830 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4832 if (STACK_REG_P (a) || STACK_REG_P (b))
4834 if (!rtx_equal_p (a, b))
4837 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4839 /* Disable sibcall if we need to generate vzeroupper after
4841 if (TARGET_VZEROUPPER
4842 && cfun->machine->callee_return_avx256_p
4843 && !cfun->machine->caller_return_avx256_p)
4846 else if (!rtx_equal_p (a, b))
4851 /* The SYSV ABI has more call-clobbered registers;
4852 disallow sibcalls from MS to SYSV. */
4853 if (cfun->machine->call_abi == MS_ABI
4854 && ix86_function_type_abi (type) == SYSV_ABI)
4859 /* If this call is indirect, we'll need to be able to use a
4860 call-clobbered register for the address of the target function.
4861 Make sure that all such registers are not used for passing
4862 parameters. Note that DLLIMPORT functions are indirect. */
4864 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4866 if (ix86_function_regparm (type, NULL) >= 3)
4868 /* ??? Need to count the actual number of registers to be used,
4869 not the possible number of registers. Fix later. */
4875 /* Otherwise okay. That also includes certain types of indirect calls. */
4879 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4880 and "sseregparm" calling convention attributes;
4881 arguments as in struct attribute_spec.handler. */
4884 ix86_handle_cconv_attribute (tree *node, tree name,
4886 int flags ATTRIBUTE_UNUSED,
4889 if (TREE_CODE (*node) != FUNCTION_TYPE
4890 && TREE_CODE (*node) != METHOD_TYPE
4891 && TREE_CODE (*node) != FIELD_DECL
4892 && TREE_CODE (*node) != TYPE_DECL)
4894 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4896 *no_add_attrs = true;
4900 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4901 if (is_attribute_p ("regparm", name))
4905 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4907 error ("fastcall and regparm attributes are not compatible");
4910 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4912 error ("regparam and thiscall attributes are not compatible");
4915 cst = TREE_VALUE (args);
4916 if (TREE_CODE (cst) != INTEGER_CST)
4918 warning (OPT_Wattributes,
4919 "%qE attribute requires an integer constant argument",
4921 *no_add_attrs = true;
4923 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4925 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4927 *no_add_attrs = true;
4935 /* Do not warn when emulating the MS ABI. */
4936 if ((TREE_CODE (*node) != FUNCTION_TYPE
4937 && TREE_CODE (*node) != METHOD_TYPE)
4938 || ix86_function_type_abi (*node) != MS_ABI)
4939 warning (OPT_Wattributes, "%qE attribute ignored",
4941 *no_add_attrs = true;
4945 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4946 if (is_attribute_p ("fastcall", name))
4948 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4950 error ("fastcall and cdecl attributes are not compatible");
4952 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4954 error ("fastcall and stdcall attributes are not compatible");
4956 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4958 error ("fastcall and regparm attributes are not compatible");
4960 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4962 error ("fastcall and thiscall attributes are not compatible");
4966 /* Can combine stdcall with fastcall (redundant), regparm and
4968 else if (is_attribute_p ("stdcall", name))
4970 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4972 error ("stdcall and cdecl attributes are not compatible");
4974 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4976 error ("stdcall and fastcall attributes are not compatible");
4978 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4980 error ("stdcall and thiscall attributes are not compatible");
4984 /* Can combine cdecl with regparm and sseregparm. */
4985 else if (is_attribute_p ("cdecl", name))
4987 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4989 error ("stdcall and cdecl attributes are not compatible");
4991 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4993 error ("fastcall and cdecl attributes are not compatible");
4995 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4997 error ("cdecl and thiscall attributes are not compatible");
5000 else if (is_attribute_p ("thiscall", name))
5002 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5003 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5005 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5007 error ("stdcall and thiscall attributes are not compatible");
5009 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5011 error ("fastcall and thiscall attributes are not compatible");
5013 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5015 error ("cdecl and thiscall attributes are not compatible");
5019 /* Can combine sseregparm with all attributes. */
5024 /* The transactional memory builtins are implicitly regparm or fastcall
5025 depending on the ABI. Override the generic do-nothing attribute that
5026 these builtins were declared with, and replace it with one of the two
5027 attributes that we expect elsewhere. */
5030 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5031 tree args ATTRIBUTE_UNUSED,
5032 int flags ATTRIBUTE_UNUSED,
5037 /* In no case do we want to add the placeholder attribute. */
5038 *no_add_attrs = true;
5040 /* The 64-bit ABI is unchanged for transactional memory. */
5044 /* ??? Is there a better way to validate 32-bit windows? We have
5045 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5046 if (CHECK_STACK_LIMIT > 0)
5047 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5050 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5051 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5053 decl_attributes (node, alt, flags);
5058 /* This function determines from TYPE the calling-convention. */
5061 ix86_get_callcvt (const_tree type)
5063 unsigned int ret = 0;
5068 return IX86_CALLCVT_CDECL;
5070 attrs = TYPE_ATTRIBUTES (type);
5071 if (attrs != NULL_TREE)
5073 if (lookup_attribute ("cdecl", attrs))
5074 ret |= IX86_CALLCVT_CDECL;
5075 else if (lookup_attribute ("stdcall", attrs))
5076 ret |= IX86_CALLCVT_STDCALL;
5077 else if (lookup_attribute ("fastcall", attrs))
5078 ret |= IX86_CALLCVT_FASTCALL;
5079 else if (lookup_attribute ("thiscall", attrs))
5080 ret |= IX86_CALLCVT_THISCALL;
5082 /* Regparam isn't allowed for thiscall and fastcall. */
5083 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5085 if (lookup_attribute ("regparm", attrs))
5086 ret |= IX86_CALLCVT_REGPARM;
5087 if (lookup_attribute ("sseregparm", attrs))
5088 ret |= IX86_CALLCVT_SSEREGPARM;
5091 if (IX86_BASE_CALLCVT(ret) != 0)
5095 is_stdarg = stdarg_p (type);
5096 if (TARGET_RTD && !is_stdarg)
5097 return IX86_CALLCVT_STDCALL | ret;
5101 || TREE_CODE (type) != METHOD_TYPE
5102 || ix86_function_type_abi (type) != MS_ABI)
5103 return IX86_CALLCVT_CDECL | ret;
5105 return IX86_CALLCVT_THISCALL;
5108 /* Return 0 if the attributes for two types are incompatible, 1 if they
5109 are compatible, and 2 if they are nearly compatible (which causes a
5110 warning to be generated). */
5113 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5115 unsigned int ccvt1, ccvt2;
5117 if (TREE_CODE (type1) != FUNCTION_TYPE
5118 && TREE_CODE (type1) != METHOD_TYPE)
5121 ccvt1 = ix86_get_callcvt (type1);
5122 ccvt2 = ix86_get_callcvt (type2);
5125 if (ix86_function_regparm (type1, NULL)
5126 != ix86_function_regparm (type2, NULL))
5132 /* Return the regparm value for a function with the indicated TYPE and DECL.
5133 DECL may be NULL when calling function indirectly
5134 or considering a libcall. */
5137 ix86_function_regparm (const_tree type, const_tree decl)
5144 return (ix86_function_type_abi (type) == SYSV_ABI
5145 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5146 ccvt = ix86_get_callcvt (type);
5147 regparm = ix86_regparm;
5149 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5151 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5154 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5158 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5160 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5163 /* Use register calling convention for local functions when possible. */
5165 && TREE_CODE (decl) == FUNCTION_DECL
5167 && !(profile_flag && !flag_fentry))
5169 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5170 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5171 if (i && i->local && i->can_change_signature)
5173 int local_regparm, globals = 0, regno;
5175 /* Make sure no regparm register is taken by a
5176 fixed register variable. */
5177 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5178 if (fixed_regs[local_regparm])
5181 /* We don't want to use regparm(3) for nested functions as
5182 these use a static chain pointer in the third argument. */
5183 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5186 /* In 32-bit mode save a register for the split stack. */
5187 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5190 /* Each fixed register usage increases register pressure,
5191 so less registers should be used for argument passing.
5192 This functionality can be overriden by an explicit
5194 for (regno = 0; regno <= DI_REG; regno++)
5195 if (fixed_regs[regno])
5199 = globals < local_regparm ? local_regparm - globals : 0;
5201 if (local_regparm > regparm)
5202 regparm = local_regparm;
5209 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5210 DFmode (2) arguments in SSE registers for a function with the
5211 indicated TYPE and DECL. DECL may be NULL when calling function
5212 indirectly or considering a libcall. Otherwise return 0. */
5215 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5217 gcc_assert (!TARGET_64BIT);
5219 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5220 by the sseregparm attribute. */
5221 if (TARGET_SSEREGPARM
5222 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5229 error ("calling %qD with attribute sseregparm without "
5230 "SSE/SSE2 enabled", decl);
5232 error ("calling %qT with attribute sseregparm without "
5233 "SSE/SSE2 enabled", type);
5241 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5242 (and DFmode for SSE2) arguments in SSE registers. */
5243 if (decl && TARGET_SSE_MATH && optimize
5244 && !(profile_flag && !flag_fentry))
5246 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5247 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5248 if (i && i->local && i->can_change_signature)
5249 return TARGET_SSE2 ? 2 : 1;
5255 /* Return true if EAX is live at the start of the function. Used by
5256 ix86_expand_prologue to determine if we need special help before
5257 calling allocate_stack_worker. */
5260 ix86_eax_live_at_start_p (void)
5262 /* Cheat. Don't bother working forward from ix86_function_regparm
5263 to the function type to whether an actual argument is located in
5264 eax. Instead just look at cfg info, which is still close enough
5265 to correct at this point. This gives false positives for broken
5266 functions that might use uninitialized data that happens to be
5267 allocated in eax, but who cares? */
5268 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5272 ix86_keep_aggregate_return_pointer (tree fntype)
5278 attr = lookup_attribute ("callee_pop_aggregate_return",
5279 TYPE_ATTRIBUTES (fntype));
5281 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5283 /* For 32-bit MS-ABI the default is to keep aggregate
5285 if (ix86_function_type_abi (fntype) == MS_ABI)
5288 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5291 /* Value is the number of bytes of arguments automatically
5292 popped when returning from a subroutine call.
5293 FUNDECL is the declaration node of the function (as a tree),
5294 FUNTYPE is the data type of the function (as a tree),
5295 or for a library call it is an identifier node for the subroutine name.
5296 SIZE is the number of bytes of arguments passed on the stack.
5298 On the 80386, the RTD insn may be used to pop them if the number
5299 of args is fixed, but if the number is variable then the caller
5300 must pop them all. RTD can't be used for library calls now
5301 because the library is compiled with the Unix compiler.
5302 Use of RTD is a selectable option, since it is incompatible with
5303 standard Unix calling sequences. If the option is not selected,
5304 the caller must always pop the args.
5306 The attribute stdcall is equivalent to RTD on a per module basis. */
5309 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5313 /* None of the 64-bit ABIs pop arguments. */
5317 ccvt = ix86_get_callcvt (funtype);
5319 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5320 | IX86_CALLCVT_THISCALL)) != 0
5321 && ! stdarg_p (funtype))
5324 /* Lose any fake structure return argument if it is passed on the stack. */
5325 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5326 && !ix86_keep_aggregate_return_pointer (funtype))
5328 int nregs = ix86_function_regparm (funtype, fundecl);
5330 return GET_MODE_SIZE (Pmode);
5336 /* Argument support functions. */
5338 /* Return true when register may be used to pass function parameters. */
5340 ix86_function_arg_regno_p (int regno)
5343 const int *parm_regs;
5348 return (regno < REGPARM_MAX
5349 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5351 return (regno < REGPARM_MAX
5352 || (TARGET_MMX && MMX_REGNO_P (regno)
5353 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5354 || (TARGET_SSE && SSE_REGNO_P (regno)
5355 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5360 if (SSE_REGNO_P (regno) && TARGET_SSE)
5365 if (TARGET_SSE && SSE_REGNO_P (regno)
5366 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5370 /* TODO: The function should depend on current function ABI but
5371 builtins.c would need updating then. Therefore we use the
5374 /* RAX is used as hidden argument to va_arg functions. */
5375 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5378 if (ix86_abi == MS_ABI)
5379 parm_regs = x86_64_ms_abi_int_parameter_registers;
5381 parm_regs = x86_64_int_parameter_registers;
5382 for (i = 0; i < (ix86_abi == MS_ABI
5383 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5384 if (regno == parm_regs[i])
5389 /* Return if we do not know how to pass TYPE solely in registers. */
5392 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5394 if (must_pass_in_stack_var_size_or_pad (mode, type))
5397 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5398 The layout_type routine is crafty and tries to trick us into passing
5399 currently unsupported vector types on the stack by using TImode. */
5400 return (!TARGET_64BIT && mode == TImode
5401 && type && TREE_CODE (type) != VECTOR_TYPE);
5404 /* It returns the size, in bytes, of the area reserved for arguments passed
5405 in registers for the function represented by fndecl dependent to the used
5408 ix86_reg_parm_stack_space (const_tree fndecl)
5410 enum calling_abi call_abi = SYSV_ABI;
5411 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5412 call_abi = ix86_function_abi (fndecl);
5414 call_abi = ix86_function_type_abi (fndecl);
5415 if (TARGET_64BIT && call_abi == MS_ABI)
5420 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5423 ix86_function_type_abi (const_tree fntype)
5425 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5427 enum calling_abi abi = ix86_abi;
5428 if (abi == SYSV_ABI)
5430 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5433 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5441 ix86_function_ms_hook_prologue (const_tree fn)
5443 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5445 if (decl_function_context (fn) != NULL_TREE)
5446 error_at (DECL_SOURCE_LOCATION (fn),
5447 "ms_hook_prologue is not compatible with nested function");
5454 static enum calling_abi
5455 ix86_function_abi (const_tree fndecl)
5459 return ix86_function_type_abi (TREE_TYPE (fndecl));
5462 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5465 ix86_cfun_abi (void)
5469 return cfun->machine->call_abi;
5472 /* Write the extra assembler code needed to declare a function properly. */
5475 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5478 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5482 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5483 unsigned int filler_cc = 0xcccccccc;
5485 for (i = 0; i < filler_count; i += 4)
5486 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5489 #ifdef SUBTARGET_ASM_UNWIND_INIT
5490 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5493 ASM_OUTPUT_LABEL (asm_out_file, fname);
5495 /* Output magic byte marker, if hot-patch attribute is set. */
5500 /* leaq [%rsp + 0], %rsp */
5501 asm_fprintf (asm_out_file, ASM_BYTE
5502 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5506 /* movl.s %edi, %edi
5508 movl.s %esp, %ebp */
5509 asm_fprintf (asm_out_file, ASM_BYTE
5510 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5516 extern void init_regs (void);
5518 /* Implementation of call abi switching target hook. Specific to FNDECL
5519 the specific call register sets are set. See also
5520 ix86_conditional_register_usage for more details. */
5522 ix86_call_abi_override (const_tree fndecl)
5524 if (fndecl == NULL_TREE)
5525 cfun->machine->call_abi = ix86_abi;
5527 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5530 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5531 expensive re-initialization of init_regs each time we switch function context
5532 since this is needed only during RTL expansion. */
5534 ix86_maybe_switch_abi (void)
5537 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5541 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5542 for a call to a function whose data type is FNTYPE.
5543 For a library call, FNTYPE is 0. */
5546 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5547 tree fntype, /* tree ptr for function decl */
5548 rtx libname, /* SYMBOL_REF of library name or 0 */
5552 struct cgraph_local_info *i;
5555 memset (cum, 0, sizeof (*cum));
5557 /* Initialize for the current callee. */
5560 cfun->machine->callee_pass_avx256_p = false;
5561 cfun->machine->callee_return_avx256_p = false;
5566 i = cgraph_local_info (fndecl);
5567 cum->call_abi = ix86_function_abi (fndecl);
5568 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5573 cum->call_abi = ix86_function_type_abi (fntype);
5575 fnret_type = TREE_TYPE (fntype);
5580 if (TARGET_VZEROUPPER && fnret_type)
5582 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5584 if (function_pass_avx256_p (fnret_value))
5586 /* The return value of this function uses 256bit AVX modes. */
5588 cfun->machine->callee_return_avx256_p = true;
5590 cfun->machine->caller_return_avx256_p = true;
5594 cum->caller = caller;
5596 /* Set up the number of registers to use for passing arguments. */
5598 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5599 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5600 "or subtarget optimization implying it");
5601 cum->nregs = ix86_regparm;
5604 cum->nregs = (cum->call_abi == SYSV_ABI
5605 ? X86_64_REGPARM_MAX
5606 : X86_64_MS_REGPARM_MAX);
5610 cum->sse_nregs = SSE_REGPARM_MAX;
5613 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5614 ? X86_64_SSE_REGPARM_MAX
5615 : X86_64_MS_SSE_REGPARM_MAX);
5619 cum->mmx_nregs = MMX_REGPARM_MAX;
5620 cum->warn_avx = true;
5621 cum->warn_sse = true;
5622 cum->warn_mmx = true;
5624 /* Because type might mismatch in between caller and callee, we need to
5625 use actual type of function for local calls.
5626 FIXME: cgraph_analyze can be told to actually record if function uses
5627 va_start so for local functions maybe_vaarg can be made aggressive
5629 FIXME: once typesytem is fixed, we won't need this code anymore. */
5630 if (i && i->local && i->can_change_signature)
5631 fntype = TREE_TYPE (fndecl);
5632 cum->maybe_vaarg = (fntype
5633 ? (!prototype_p (fntype) || stdarg_p (fntype))
5638 /* If there are variable arguments, then we won't pass anything
5639 in registers in 32-bit mode. */
5640 if (stdarg_p (fntype))
5651 /* Use ecx and edx registers if function has fastcall attribute,
5652 else look for regparm information. */
5655 unsigned int ccvt = ix86_get_callcvt (fntype);
5656 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5659 cum->fastcall = 1; /* Same first register as in fastcall. */
5661 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5667 cum->nregs = ix86_function_regparm (fntype, fndecl);
5670 /* Set up the number of SSE registers used for passing SFmode
5671 and DFmode arguments. Warn for mismatching ABI. */
5672 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5676 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5677 But in the case of vector types, it is some vector mode.
5679 When we have only some of our vector isa extensions enabled, then there
5680 are some modes for which vector_mode_supported_p is false. For these
5681 modes, the generic vector support in gcc will choose some non-vector mode
5682 in order to implement the type. By computing the natural mode, we'll
5683 select the proper ABI location for the operand and not depend on whatever
5684 the middle-end decides to do with these vector types.
5686 The midde-end can't deal with the vector types > 16 bytes. In this
5687 case, we return the original mode and warn ABI change if CUM isn't
5690 static enum machine_mode
5691 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5693 enum machine_mode mode = TYPE_MODE (type);
5695 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5697 HOST_WIDE_INT size = int_size_in_bytes (type);
5698 if ((size == 8 || size == 16 || size == 32)
5699 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5700 && TYPE_VECTOR_SUBPARTS (type) > 1)
5702 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5704 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5705 mode = MIN_MODE_VECTOR_FLOAT;
5707 mode = MIN_MODE_VECTOR_INT;
5709 /* Get the mode which has this inner mode and number of units. */
5710 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5711 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5712 && GET_MODE_INNER (mode) == innermode)
5714 if (size == 32 && !TARGET_AVX)
5716 static bool warnedavx;
5723 warning (0, "AVX vector argument without AVX "
5724 "enabled changes the ABI");
5726 return TYPE_MODE (type);
5739 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5740 this may not agree with the mode that the type system has chosen for the
5741 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5742 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5745 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5750 if (orig_mode != BLKmode)
5751 tmp = gen_rtx_REG (orig_mode, regno);
5754 tmp = gen_rtx_REG (mode, regno);
5755 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5756 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5762 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5763 of this code is to classify each 8bytes of incoming argument by the register
5764 class and assign registers accordingly. */
5766 /* Return the union class of CLASS1 and CLASS2.
5767 See the x86-64 PS ABI for details. */
5769 static enum x86_64_reg_class
5770 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5772 /* Rule #1: If both classes are equal, this is the resulting class. */
5773 if (class1 == class2)
5776 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5778 if (class1 == X86_64_NO_CLASS)
5780 if (class2 == X86_64_NO_CLASS)
5783 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5784 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5785 return X86_64_MEMORY_CLASS;
5787 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5788 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5789 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5790 return X86_64_INTEGERSI_CLASS;
5791 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5792 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5793 return X86_64_INTEGER_CLASS;
5795 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5797 if (class1 == X86_64_X87_CLASS
5798 || class1 == X86_64_X87UP_CLASS
5799 || class1 == X86_64_COMPLEX_X87_CLASS
5800 || class2 == X86_64_X87_CLASS
5801 || class2 == X86_64_X87UP_CLASS
5802 || class2 == X86_64_COMPLEX_X87_CLASS)
5803 return X86_64_MEMORY_CLASS;
5805 /* Rule #6: Otherwise class SSE is used. */
5806 return X86_64_SSE_CLASS;
5809 /* Classify the argument of type TYPE and mode MODE.
5810 CLASSES will be filled by the register class used to pass each word
5811 of the operand. The number of words is returned. In case the parameter
5812 should be passed in memory, 0 is returned. As a special case for zero
5813 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5815 BIT_OFFSET is used internally for handling records and specifies offset
5816 of the offset in bits modulo 256 to avoid overflow cases.
5818 See the x86-64 PS ABI for details.
5822 classify_argument (enum machine_mode mode, const_tree type,
5823 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5825 HOST_WIDE_INT bytes =
5826 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5827 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5829 /* Variable sized entities are always passed/returned in memory. */
5833 if (mode != VOIDmode
5834 && targetm.calls.must_pass_in_stack (mode, type))
5837 if (type && AGGREGATE_TYPE_P (type))
5841 enum x86_64_reg_class subclasses[MAX_CLASSES];
5843 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5847 for (i = 0; i < words; i++)
5848 classes[i] = X86_64_NO_CLASS;
5850 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5851 signalize memory class, so handle it as special case. */
5854 classes[0] = X86_64_NO_CLASS;
5858 /* Classify each field of record and merge classes. */
5859 switch (TREE_CODE (type))
5862 /* And now merge the fields of structure. */
5863 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5865 if (TREE_CODE (field) == FIELD_DECL)
5869 if (TREE_TYPE (field) == error_mark_node)
5872 /* Bitfields are always classified as integer. Handle them
5873 early, since later code would consider them to be
5874 misaligned integers. */
5875 if (DECL_BIT_FIELD (field))
5877 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5878 i < ((int_bit_position (field) + (bit_offset % 64))
5879 + tree_low_cst (DECL_SIZE (field), 0)
5882 merge_classes (X86_64_INTEGER_CLASS,
5889 type = TREE_TYPE (field);
5891 /* Flexible array member is ignored. */
5892 if (TYPE_MODE (type) == BLKmode
5893 && TREE_CODE (type) == ARRAY_TYPE
5894 && TYPE_SIZE (type) == NULL_TREE
5895 && TYPE_DOMAIN (type) != NULL_TREE
5896 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5901 if (!warned && warn_psabi)
5904 inform (input_location,
5905 "the ABI of passing struct with"
5906 " a flexible array member has"
5907 " changed in GCC 4.4");
5911 num = classify_argument (TYPE_MODE (type), type,
5913 (int_bit_position (field)
5914 + bit_offset) % 256);
5917 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5918 for (i = 0; i < num && (i + pos) < words; i++)
5920 merge_classes (subclasses[i], classes[i + pos]);
5927 /* Arrays are handled as small records. */
5930 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5931 TREE_TYPE (type), subclasses, bit_offset);
5935 /* The partial classes are now full classes. */
5936 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5937 subclasses[0] = X86_64_SSE_CLASS;
5938 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5939 && !((bit_offset % 64) == 0 && bytes == 4))
5940 subclasses[0] = X86_64_INTEGER_CLASS;
5942 for (i = 0; i < words; i++)
5943 classes[i] = subclasses[i % num];
5948 case QUAL_UNION_TYPE:
5949 /* Unions are similar to RECORD_TYPE but offset is always 0.
5951 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5953 if (TREE_CODE (field) == FIELD_DECL)
5957 if (TREE_TYPE (field) == error_mark_node)
5960 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5961 TREE_TYPE (field), subclasses,
5965 for (i = 0; i < num; i++)
5966 classes[i] = merge_classes (subclasses[i], classes[i]);
5977 /* When size > 16 bytes, if the first one isn't
5978 X86_64_SSE_CLASS or any other ones aren't
5979 X86_64_SSEUP_CLASS, everything should be passed in
5981 if (classes[0] != X86_64_SSE_CLASS)
5984 for (i = 1; i < words; i++)
5985 if (classes[i] != X86_64_SSEUP_CLASS)
5989 /* Final merger cleanup. */
5990 for (i = 0; i < words; i++)
5992 /* If one class is MEMORY, everything should be passed in
5994 if (classes[i] == X86_64_MEMORY_CLASS)
5997 /* The X86_64_SSEUP_CLASS should be always preceded by
5998 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5999 if (classes[i] == X86_64_SSEUP_CLASS
6000 && classes[i - 1] != X86_64_SSE_CLASS
6001 && classes[i - 1] != X86_64_SSEUP_CLASS)
6003 /* The first one should never be X86_64_SSEUP_CLASS. */
6004 gcc_assert (i != 0);
6005 classes[i] = X86_64_SSE_CLASS;
6008 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6009 everything should be passed in memory. */
6010 if (classes[i] == X86_64_X87UP_CLASS
6011 && (classes[i - 1] != X86_64_X87_CLASS))
6015 /* The first one should never be X86_64_X87UP_CLASS. */
6016 gcc_assert (i != 0);
6017 if (!warned && warn_psabi)
6020 inform (input_location,
6021 "the ABI of passing union with long double"
6022 " has changed in GCC 4.4");
6030 /* Compute alignment needed. We align all types to natural boundaries with
6031 exception of XFmode that is aligned to 64bits. */
6032 if (mode != VOIDmode && mode != BLKmode)
6034 int mode_alignment = GET_MODE_BITSIZE (mode);
6037 mode_alignment = 128;
6038 else if (mode == XCmode)
6039 mode_alignment = 256;
6040 if (COMPLEX_MODE_P (mode))
6041 mode_alignment /= 2;
6042 /* Misaligned fields are always returned in memory. */
6043 if (bit_offset % mode_alignment)
6047 /* for V1xx modes, just use the base mode */
6048 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6049 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6050 mode = GET_MODE_INNER (mode);
6052 /* Classification of atomic types. */
6057 classes[0] = X86_64_SSE_CLASS;
6060 classes[0] = X86_64_SSE_CLASS;
6061 classes[1] = X86_64_SSEUP_CLASS;
6071 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6075 classes[0] = X86_64_INTEGERSI_CLASS;
6078 else if (size <= 64)
6080 classes[0] = X86_64_INTEGER_CLASS;
6083 else if (size <= 64+32)
6085 classes[0] = X86_64_INTEGER_CLASS;
6086 classes[1] = X86_64_INTEGERSI_CLASS;
6089 else if (size <= 64+64)
6091 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6099 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6103 /* OImode shouldn't be used directly. */
6108 if (!(bit_offset % 64))
6109 classes[0] = X86_64_SSESF_CLASS;
6111 classes[0] = X86_64_SSE_CLASS;
6114 classes[0] = X86_64_SSEDF_CLASS;
6117 classes[0] = X86_64_X87_CLASS;
6118 classes[1] = X86_64_X87UP_CLASS;
6121 classes[0] = X86_64_SSE_CLASS;
6122 classes[1] = X86_64_SSEUP_CLASS;
6125 classes[0] = X86_64_SSE_CLASS;
6126 if (!(bit_offset % 64))
6132 if (!warned && warn_psabi)
6135 inform (input_location,
6136 "the ABI of passing structure with complex float"
6137 " member has changed in GCC 4.4");
6139 classes[1] = X86_64_SSESF_CLASS;
6143 classes[0] = X86_64_SSEDF_CLASS;
6144 classes[1] = X86_64_SSEDF_CLASS;
6147 classes[0] = X86_64_COMPLEX_X87_CLASS;
6150 /* This modes is larger than 16 bytes. */
6158 classes[0] = X86_64_SSE_CLASS;
6159 classes[1] = X86_64_SSEUP_CLASS;
6160 classes[2] = X86_64_SSEUP_CLASS;
6161 classes[3] = X86_64_SSEUP_CLASS;
6169 classes[0] = X86_64_SSE_CLASS;
6170 classes[1] = X86_64_SSEUP_CLASS;
6178 classes[0] = X86_64_SSE_CLASS;
6184 gcc_assert (VECTOR_MODE_P (mode));
6189 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6191 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6192 classes[0] = X86_64_INTEGERSI_CLASS;
6194 classes[0] = X86_64_INTEGER_CLASS;
6195 classes[1] = X86_64_INTEGER_CLASS;
6196 return 1 + (bytes > 8);
6200 /* Examine the argument and return set number of register required in each
6201 class. Return 0 iff parameter should be passed in memory. */
6203 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6204 int *int_nregs, int *sse_nregs)
6206 enum x86_64_reg_class regclass[MAX_CLASSES];
6207 int n = classify_argument (mode, type, regclass, 0);
6213 for (n--; n >= 0; n--)
6214 switch (regclass[n])
6216 case X86_64_INTEGER_CLASS:
6217 case X86_64_INTEGERSI_CLASS:
6220 case X86_64_SSE_CLASS:
6221 case X86_64_SSESF_CLASS:
6222 case X86_64_SSEDF_CLASS:
6225 case X86_64_NO_CLASS:
6226 case X86_64_SSEUP_CLASS:
6228 case X86_64_X87_CLASS:
6229 case X86_64_X87UP_CLASS:
6233 case X86_64_COMPLEX_X87_CLASS:
6234 return in_return ? 2 : 0;
6235 case X86_64_MEMORY_CLASS:
6241 /* Construct container for the argument used by GCC interface. See
6242 FUNCTION_ARG for the detailed description. */
6245 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6246 const_tree type, int in_return, int nintregs, int nsseregs,
6247 const int *intreg, int sse_regno)
6249 /* The following variables hold the static issued_error state. */
6250 static bool issued_sse_arg_error;
6251 static bool issued_sse_ret_error;
6252 static bool issued_x87_ret_error;
6254 enum machine_mode tmpmode;
6256 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6257 enum x86_64_reg_class regclass[MAX_CLASSES];
6261 int needed_sseregs, needed_intregs;
6262 rtx exp[MAX_CLASSES];
6265 n = classify_argument (mode, type, regclass, 0);
6268 if (!examine_argument (mode, type, in_return, &needed_intregs,
6271 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6274 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6275 some less clueful developer tries to use floating-point anyway. */
6276 if (needed_sseregs && !TARGET_SSE)
6280 if (!issued_sse_ret_error)
6282 error ("SSE register return with SSE disabled");
6283 issued_sse_ret_error = true;
6286 else if (!issued_sse_arg_error)
6288 error ("SSE register argument with SSE disabled");
6289 issued_sse_arg_error = true;
6294 /* Likewise, error if the ABI requires us to return values in the
6295 x87 registers and the user specified -mno-80387. */
6296 if (!TARGET_80387 && in_return)
6297 for (i = 0; i < n; i++)
6298 if (regclass[i] == X86_64_X87_CLASS
6299 || regclass[i] == X86_64_X87UP_CLASS
6300 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6302 if (!issued_x87_ret_error)
6304 error ("x87 register return with x87 disabled");
6305 issued_x87_ret_error = true;
6310 /* First construct simple cases. Avoid SCmode, since we want to use
6311 single register to pass this type. */
6312 if (n == 1 && mode != SCmode)
6313 switch (regclass[0])
6315 case X86_64_INTEGER_CLASS:
6316 case X86_64_INTEGERSI_CLASS:
6317 return gen_rtx_REG (mode, intreg[0]);
6318 case X86_64_SSE_CLASS:
6319 case X86_64_SSESF_CLASS:
6320 case X86_64_SSEDF_CLASS:
6321 if (mode != BLKmode)
6322 return gen_reg_or_parallel (mode, orig_mode,
6323 SSE_REGNO (sse_regno));
6325 case X86_64_X87_CLASS:
6326 case X86_64_COMPLEX_X87_CLASS:
6327 return gen_rtx_REG (mode, FIRST_STACK_REG);
6328 case X86_64_NO_CLASS:
6329 /* Zero sized array, struct or class. */
6334 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6335 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6336 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6338 && regclass[0] == X86_64_SSE_CLASS
6339 && regclass[1] == X86_64_SSEUP_CLASS
6340 && regclass[2] == X86_64_SSEUP_CLASS
6341 && regclass[3] == X86_64_SSEUP_CLASS
6343 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6346 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6347 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6348 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6349 && regclass[1] == X86_64_INTEGER_CLASS
6350 && (mode == CDImode || mode == TImode || mode == TFmode)
6351 && intreg[0] + 1 == intreg[1])
6352 return gen_rtx_REG (mode, intreg[0]);
6354 /* Otherwise figure out the entries of the PARALLEL. */
6355 for (i = 0; i < n; i++)
6359 switch (regclass[i])
6361 case X86_64_NO_CLASS:
6363 case X86_64_INTEGER_CLASS:
6364 case X86_64_INTEGERSI_CLASS:
6365 /* Merge TImodes on aligned occasions here too. */
6366 if (i * 8 + 8 > bytes)
6367 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6368 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6372 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6373 if (tmpmode == BLKmode)
6375 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6376 gen_rtx_REG (tmpmode, *intreg),
6380 case X86_64_SSESF_CLASS:
6381 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6382 gen_rtx_REG (SFmode,
6383 SSE_REGNO (sse_regno)),
6387 case X86_64_SSEDF_CLASS:
6388 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6389 gen_rtx_REG (DFmode,
6390 SSE_REGNO (sse_regno)),
6394 case X86_64_SSE_CLASS:
6402 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6412 && regclass[1] == X86_64_SSEUP_CLASS
6413 && regclass[2] == X86_64_SSEUP_CLASS
6414 && regclass[3] == X86_64_SSEUP_CLASS);
6421 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6422 gen_rtx_REG (tmpmode,
6423 SSE_REGNO (sse_regno)),
6432 /* Empty aligned struct, union or class. */
6436 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6437 for (i = 0; i < nexps; i++)
6438 XVECEXP (ret, 0, i) = exp [i];
6442 /* Update the data in CUM to advance over an argument of mode MODE
6443 and data type TYPE. (TYPE is null for libcalls where that information
6444 may not be available.) */
6447 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6448 const_tree type, HOST_WIDE_INT bytes,
6449 HOST_WIDE_INT words)
6465 cum->words += words;
6466 cum->nregs -= words;
6467 cum->regno += words;
6469 if (cum->nregs <= 0)
6477 /* OImode shouldn't be used directly. */
6481 if (cum->float_in_sse < 2)
6484 if (cum->float_in_sse < 1)
6501 if (!type || !AGGREGATE_TYPE_P (type))
6503 cum->sse_words += words;
6504 cum->sse_nregs -= 1;
6505 cum->sse_regno += 1;
6506 if (cum->sse_nregs <= 0)
6520 if (!type || !AGGREGATE_TYPE_P (type))
6522 cum->mmx_words += words;
6523 cum->mmx_nregs -= 1;
6524 cum->mmx_regno += 1;
6525 if (cum->mmx_nregs <= 0)
6536 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6537 const_tree type, HOST_WIDE_INT words, bool named)
6539 int int_nregs, sse_nregs;
6541 /* Unnamed 256bit vector mode parameters are passed on stack. */
6542 if (!named && VALID_AVX256_REG_MODE (mode))
6545 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6546 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6548 cum->nregs -= int_nregs;
6549 cum->sse_nregs -= sse_nregs;
6550 cum->regno += int_nregs;
6551 cum->sse_regno += sse_nregs;
6555 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6556 cum->words = (cum->words + align - 1) & ~(align - 1);
6557 cum->words += words;
6562 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6563 HOST_WIDE_INT words)
6565 /* Otherwise, this should be passed indirect. */
6566 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6568 cum->words += words;
6576 /* Update the data in CUM to advance over an argument of mode MODE and
6577 data type TYPE. (TYPE is null for libcalls where that information
6578 may not be available.) */
6581 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6582 const_tree type, bool named)
6584 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6585 HOST_WIDE_INT bytes, words;
6587 if (mode == BLKmode)
6588 bytes = int_size_in_bytes (type);
6590 bytes = GET_MODE_SIZE (mode);
6591 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6594 mode = type_natural_mode (type, NULL);
6596 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6597 function_arg_advance_ms_64 (cum, bytes, words);
6598 else if (TARGET_64BIT)
6599 function_arg_advance_64 (cum, mode, type, words, named);
6601 function_arg_advance_32 (cum, mode, type, bytes, words);
6604 /* Define where to put the arguments to a function.
6605 Value is zero to push the argument on the stack,
6606 or a hard register in which to store the argument.
6608 MODE is the argument's machine mode.
6609 TYPE is the data type of the argument (as a tree).
6610 This is null for libcalls where that information may
6612 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6613 the preceding args and about the function being called.
6614 NAMED is nonzero if this argument is a named parameter
6615 (otherwise it is an extra parameter matching an ellipsis). */
6618 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6619 enum machine_mode orig_mode, const_tree type,
6620 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6622 static bool warnedsse, warnedmmx;
6624 /* Avoid the AL settings for the Unix64 ABI. */
6625 if (mode == VOIDmode)
6641 if (words <= cum->nregs)
6643 int regno = cum->regno;
6645 /* Fastcall allocates the first two DWORD (SImode) or
6646 smaller arguments to ECX and EDX if it isn't an
6652 || (type && AGGREGATE_TYPE_P (type)))
6655 /* ECX not EAX is the first allocated register. */
6656 if (regno == AX_REG)
6659 return gen_rtx_REG (mode, regno);
6664 if (cum->float_in_sse < 2)
6667 if (cum->float_in_sse < 1)
6671 /* In 32bit, we pass TImode in xmm registers. */
6678 if (!type || !AGGREGATE_TYPE_P (type))
6680 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6683 warning (0, "SSE vector argument without SSE enabled "
6687 return gen_reg_or_parallel (mode, orig_mode,
6688 cum->sse_regno + FIRST_SSE_REG);
6693 /* OImode shouldn't be used directly. */
6702 if (!type || !AGGREGATE_TYPE_P (type))
6705 return gen_reg_or_parallel (mode, orig_mode,
6706 cum->sse_regno + FIRST_SSE_REG);
6716 if (!type || !AGGREGATE_TYPE_P (type))
6718 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6721 warning (0, "MMX vector argument without MMX enabled "
6725 return gen_reg_or_parallel (mode, orig_mode,
6726 cum->mmx_regno + FIRST_MMX_REG);
6735 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6736 enum machine_mode orig_mode, const_tree type, bool named)
6738 /* Handle a hidden AL argument containing number of registers
6739 for varargs x86-64 functions. */
6740 if (mode == VOIDmode)
6741 return GEN_INT (cum->maybe_vaarg
6742 ? (cum->sse_nregs < 0
6743 ? X86_64_SSE_REGPARM_MAX
6758 /* Unnamed 256bit vector mode parameters are passed on stack. */
6764 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6766 &x86_64_int_parameter_registers [cum->regno],
6771 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6772 enum machine_mode orig_mode, bool named,
6773 HOST_WIDE_INT bytes)
6777 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6778 We use value of -2 to specify that current function call is MSABI. */
6779 if (mode == VOIDmode)
6780 return GEN_INT (-2);
6782 /* If we've run out of registers, it goes on the stack. */
6783 if (cum->nregs == 0)
6786 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6788 /* Only floating point modes are passed in anything but integer regs. */
6789 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6792 regno = cum->regno + FIRST_SSE_REG;
6797 /* Unnamed floating parameters are passed in both the
6798 SSE and integer registers. */
6799 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6800 t2 = gen_rtx_REG (mode, regno);
6801 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6802 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6803 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6806 /* Handle aggregated types passed in register. */
6807 if (orig_mode == BLKmode)
6809 if (bytes > 0 && bytes <= 8)
6810 mode = (bytes > 4 ? DImode : SImode);
6811 if (mode == BLKmode)
6815 return gen_reg_or_parallel (mode, orig_mode, regno);
6818 /* Return where to put the arguments to a function.
6819 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6821 MODE is the argument's machine mode. TYPE is the data type of the
6822 argument. It is null for libcalls where that information may not be
6823 available. CUM gives information about the preceding args and about
6824 the function being called. NAMED is nonzero if this argument is a
6825 named parameter (otherwise it is an extra parameter matching an
6829 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6830 const_tree type, bool named)
6832 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6833 enum machine_mode mode = omode;
6834 HOST_WIDE_INT bytes, words;
6837 if (mode == BLKmode)
6838 bytes = int_size_in_bytes (type);
6840 bytes = GET_MODE_SIZE (mode);
6841 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6843 /* To simplify the code below, represent vector types with a vector mode
6844 even if MMX/SSE are not active. */
6845 if (type && TREE_CODE (type) == VECTOR_TYPE)
6846 mode = type_natural_mode (type, cum);
6848 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6849 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6850 else if (TARGET_64BIT)
6851 arg = function_arg_64 (cum, mode, omode, type, named);
6853 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6855 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6857 /* This argument uses 256bit AVX modes. */
6859 cfun->machine->callee_pass_avx256_p = true;
6861 cfun->machine->caller_pass_avx256_p = true;
6867 /* A C expression that indicates when an argument must be passed by
6868 reference. If nonzero for an argument, a copy of that argument is
6869 made in memory and a pointer to the argument is passed instead of
6870 the argument itself. The pointer is passed in whatever way is
6871 appropriate for passing a pointer to that type. */
6874 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6875 enum machine_mode mode ATTRIBUTE_UNUSED,
6876 const_tree type, bool named ATTRIBUTE_UNUSED)
6878 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6880 /* See Windows x64 Software Convention. */
6881 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6883 int msize = (int) GET_MODE_SIZE (mode);
6886 /* Arrays are passed by reference. */
6887 if (TREE_CODE (type) == ARRAY_TYPE)
6890 if (AGGREGATE_TYPE_P (type))
6892 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6893 are passed by reference. */
6894 msize = int_size_in_bytes (type);
6898 /* __m128 is passed by reference. */
6900 case 1: case 2: case 4: case 8:
6906 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6912 /* Return true when TYPE should be 128bit aligned for 32bit argument
6913 passing ABI. XXX: This function is obsolete and is only used for
6914 checking psABI compatibility with previous versions of GCC. */
6917 ix86_compat_aligned_value_p (const_tree type)
6919 enum machine_mode mode = TYPE_MODE (type);
6920 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6924 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6926 if (TYPE_ALIGN (type) < 128)
6929 if (AGGREGATE_TYPE_P (type))
6931 /* Walk the aggregates recursively. */
6932 switch (TREE_CODE (type))
6936 case QUAL_UNION_TYPE:
6940 /* Walk all the structure fields. */
6941 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6943 if (TREE_CODE (field) == FIELD_DECL
6944 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6951 /* Just for use if some languages passes arrays by value. */
6952 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6963 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6964 XXX: This function is obsolete and is only used for checking psABI
6965 compatibility with previous versions of GCC. */
6968 ix86_compat_function_arg_boundary (enum machine_mode mode,
6969 const_tree type, unsigned int align)
6971 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6972 natural boundaries. */
6973 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6975 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6976 make an exception for SSE modes since these require 128bit
6979 The handling here differs from field_alignment. ICC aligns MMX
6980 arguments to 4 byte boundaries, while structure fields are aligned
6981 to 8 byte boundaries. */
6984 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6985 align = PARM_BOUNDARY;
6989 if (!ix86_compat_aligned_value_p (type))
6990 align = PARM_BOUNDARY;
6993 if (align > BIGGEST_ALIGNMENT)
6994 align = BIGGEST_ALIGNMENT;
6998 /* Return true when TYPE should be 128bit aligned for 32bit argument
7002 ix86_contains_aligned_value_p (const_tree type)
7004 enum machine_mode mode = TYPE_MODE (type);
7006 if (mode == XFmode || mode == XCmode)
7009 if (TYPE_ALIGN (type) < 128)
7012 if (AGGREGATE_TYPE_P (type))
7014 /* Walk the aggregates recursively. */
7015 switch (TREE_CODE (type))
7019 case QUAL_UNION_TYPE:
7023 /* Walk all the structure fields. */
7024 for (field = TYPE_FIELDS (type);
7026 field = DECL_CHAIN (field))
7028 if (TREE_CODE (field) == FIELD_DECL
7029 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7036 /* Just for use if some languages passes arrays by value. */
7037 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7046 return TYPE_ALIGN (type) >= 128;
7051 /* Gives the alignment boundary, in bits, of an argument with the
7052 specified mode and type. */
7055 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7060 /* Since the main variant type is used for call, we convert it to
7061 the main variant type. */
7062 type = TYPE_MAIN_VARIANT (type);
7063 align = TYPE_ALIGN (type);
7066 align = GET_MODE_ALIGNMENT (mode);
7067 if (align < PARM_BOUNDARY)
7068 align = PARM_BOUNDARY;
7072 unsigned int saved_align = align;
7076 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7079 if (mode == XFmode || mode == XCmode)
7080 align = PARM_BOUNDARY;
7082 else if (!ix86_contains_aligned_value_p (type))
7083 align = PARM_BOUNDARY;
7086 align = PARM_BOUNDARY;
7091 && align != ix86_compat_function_arg_boundary (mode, type,
7095 inform (input_location,
7096 "The ABI for passing parameters with %d-byte"
7097 " alignment has changed in GCC 4.6",
7098 align / BITS_PER_UNIT);
7105 /* Return true if N is a possible register number of function value. */
7108 ix86_function_value_regno_p (const unsigned int regno)
7115 case FIRST_FLOAT_REG:
7116 /* TODO: The function should depend on current function ABI but
7117 builtins.c would need updating then. Therefore we use the
7119 if (TARGET_64BIT && ix86_abi == MS_ABI)
7121 return TARGET_FLOAT_RETURNS_IN_80387;
7127 if (TARGET_MACHO || TARGET_64BIT)
7135 /* Define how to find the value returned by a function.
7136 VALTYPE is the data type of the value (as a tree).
7137 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7138 otherwise, FUNC is 0. */
7141 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7142 const_tree fntype, const_tree fn)
7146 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7147 we normally prevent this case when mmx is not available. However
7148 some ABIs may require the result to be returned like DImode. */
7149 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7150 regno = FIRST_MMX_REG;
7152 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7153 we prevent this case when sse is not available. However some ABIs
7154 may require the result to be returned like integer TImode. */
7155 else if (mode == TImode
7156 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7157 regno = FIRST_SSE_REG;
7159 /* 32-byte vector modes in %ymm0. */
7160 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7161 regno = FIRST_SSE_REG;
7163 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7164 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7165 regno = FIRST_FLOAT_REG;
7167 /* Most things go in %eax. */
7170 /* Override FP return register with %xmm0 for local functions when
7171 SSE math is enabled or for functions with sseregparm attribute. */
7172 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7174 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7175 if ((sse_level >= 1 && mode == SFmode)
7176 || (sse_level == 2 && mode == DFmode))
7177 regno = FIRST_SSE_REG;
7180 /* OImode shouldn't be used directly. */
7181 gcc_assert (mode != OImode);
7183 return gen_rtx_REG (orig_mode, regno);
7187 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7192 /* Handle libcalls, which don't provide a type node. */
7193 if (valtype == NULL)
7207 regno = FIRST_SSE_REG;
7211 regno = FIRST_FLOAT_REG;
7219 return gen_rtx_REG (mode, regno);
7221 else if (POINTER_TYPE_P (valtype))
7223 /* Pointers are always returned in Pmode. */
7227 ret = construct_container (mode, orig_mode, valtype, 1,
7228 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7229 x86_64_int_return_registers, 0);
7231 /* For zero sized structures, construct_container returns NULL, but we
7232 need to keep rest of compiler happy by returning meaningful value. */
7234 ret = gen_rtx_REG (orig_mode, AX_REG);
7240 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7242 unsigned int regno = AX_REG;
7246 switch (GET_MODE_SIZE (mode))
7249 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7250 && !COMPLEX_MODE_P (mode))
7251 regno = FIRST_SSE_REG;
7255 if (mode == SFmode || mode == DFmode)
7256 regno = FIRST_SSE_REG;
7262 return gen_rtx_REG (orig_mode, regno);
7266 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7267 enum machine_mode orig_mode, enum machine_mode mode)
7269 const_tree fn, fntype;
7272 if (fntype_or_decl && DECL_P (fntype_or_decl))
7273 fn = fntype_or_decl;
7274 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7276 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7277 return function_value_ms_64 (orig_mode, mode);
7278 else if (TARGET_64BIT)
7279 return function_value_64 (orig_mode, mode, valtype);
7281 return function_value_32 (orig_mode, mode, fntype, fn);
7285 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7286 bool outgoing ATTRIBUTE_UNUSED)
7288 enum machine_mode mode, orig_mode;
7290 orig_mode = TYPE_MODE (valtype);
7291 mode = type_natural_mode (valtype, NULL);
7292 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7295 /* Pointer function arguments and return values are promoted to Pmode. */
7297 static enum machine_mode
7298 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7299 int *punsignedp, const_tree fntype,
7302 if (type != NULL_TREE && POINTER_TYPE_P (type))
7304 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7307 return default_promote_function_mode (type, mode, punsignedp, fntype,
7312 ix86_libcall_value (enum machine_mode mode)
7314 return ix86_function_value_1 (NULL, NULL, mode, mode);
7317 /* Return true iff type is returned in memory. */
7319 static bool ATTRIBUTE_UNUSED
7320 return_in_memory_32 (const_tree type, enum machine_mode mode)
7324 if (mode == BLKmode)
7327 size = int_size_in_bytes (type);
7329 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7332 if (VECTOR_MODE_P (mode) || mode == TImode)
7334 /* User-created vectors small enough to fit in EAX. */
7338 /* MMX/3dNow values are returned in MM0,
7339 except when it doesn't exits or the ABI prescribes otherwise. */
7341 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7343 /* SSE values are returned in XMM0, except when it doesn't exist. */
7347 /* AVX values are returned in YMM0, except when it doesn't exist. */
7358 /* OImode shouldn't be used directly. */
7359 gcc_assert (mode != OImode);
7364 static bool ATTRIBUTE_UNUSED
7365 return_in_memory_64 (const_tree type, enum machine_mode mode)
7367 int needed_intregs, needed_sseregs;
7368 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7371 static bool ATTRIBUTE_UNUSED
7372 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7374 HOST_WIDE_INT size = int_size_in_bytes (type);
7376 /* __m128 is returned in xmm0. */
7377 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7378 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7381 /* Otherwise, the size must be exactly in [1248]. */
7382 return size != 1 && size != 2 && size != 4 && size != 8;
7386 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7388 #ifdef SUBTARGET_RETURN_IN_MEMORY
7389 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7391 const enum machine_mode mode = type_natural_mode (type, NULL);
7395 if (ix86_function_type_abi (fntype) == MS_ABI)
7396 return return_in_memory_ms_64 (type, mode);
7398 return return_in_memory_64 (type, mode);
7401 return return_in_memory_32 (type, mode);
7405 /* When returning SSE vector types, we have a choice of either
7406 (1) being abi incompatible with a -march switch, or
7407 (2) generating an error.
7408 Given no good solution, I think the safest thing is one warning.
7409 The user won't be able to use -Werror, but....
7411 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7412 called in response to actually generating a caller or callee that
7413 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7414 via aggregate_value_p for general type probing from tree-ssa. */
7417 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7419 static bool warnedsse, warnedmmx;
7421 if (!TARGET_64BIT && type)
7423 /* Look at the return type of the function, not the function type. */
7424 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7426 if (!TARGET_SSE && !warnedsse)
7429 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7432 warning (0, "SSE vector return without SSE enabled "
7437 if (!TARGET_MMX && !warnedmmx)
7439 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7442 warning (0, "MMX vector return without MMX enabled "
7452 /* Create the va_list data type. */
7454 /* Returns the calling convention specific va_list date type.
7455 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7458 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7460 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7462 /* For i386 we use plain pointer to argument area. */
7463 if (!TARGET_64BIT || abi == MS_ABI)
7464 return build_pointer_type (char_type_node);
7466 record = lang_hooks.types.make_type (RECORD_TYPE);
7467 type_decl = build_decl (BUILTINS_LOCATION,
7468 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7470 f_gpr = build_decl (BUILTINS_LOCATION,
7471 FIELD_DECL, get_identifier ("gp_offset"),
7472 unsigned_type_node);
7473 f_fpr = build_decl (BUILTINS_LOCATION,
7474 FIELD_DECL, get_identifier ("fp_offset"),
7475 unsigned_type_node);
7476 f_ovf = build_decl (BUILTINS_LOCATION,
7477 FIELD_DECL, get_identifier ("overflow_arg_area"),
7479 f_sav = build_decl (BUILTINS_LOCATION,
7480 FIELD_DECL, get_identifier ("reg_save_area"),
7483 va_list_gpr_counter_field = f_gpr;
7484 va_list_fpr_counter_field = f_fpr;
7486 DECL_FIELD_CONTEXT (f_gpr) = record;
7487 DECL_FIELD_CONTEXT (f_fpr) = record;
7488 DECL_FIELD_CONTEXT (f_ovf) = record;
7489 DECL_FIELD_CONTEXT (f_sav) = record;
7491 TYPE_STUB_DECL (record) = type_decl;
7492 TYPE_NAME (record) = type_decl;
7493 TYPE_FIELDS (record) = f_gpr;
7494 DECL_CHAIN (f_gpr) = f_fpr;
7495 DECL_CHAIN (f_fpr) = f_ovf;
7496 DECL_CHAIN (f_ovf) = f_sav;
7498 layout_type (record);
7500 /* The correct type is an array type of one element. */
7501 return build_array_type (record, build_index_type (size_zero_node));
7504 /* Setup the builtin va_list data type and for 64-bit the additional
7505 calling convention specific va_list data types. */
7508 ix86_build_builtin_va_list (void)
7510 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7512 /* Initialize abi specific va_list builtin types. */
7516 if (ix86_abi == MS_ABI)
7518 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7519 if (TREE_CODE (t) != RECORD_TYPE)
7520 t = build_variant_type_copy (t);
7521 sysv_va_list_type_node = t;
7526 if (TREE_CODE (t) != RECORD_TYPE)
7527 t = build_variant_type_copy (t);
7528 sysv_va_list_type_node = t;
7530 if (ix86_abi != MS_ABI)
7532 t = ix86_build_builtin_va_list_abi (MS_ABI);
7533 if (TREE_CODE (t) != RECORD_TYPE)
7534 t = build_variant_type_copy (t);
7535 ms_va_list_type_node = t;
7540 if (TREE_CODE (t) != RECORD_TYPE)
7541 t = build_variant_type_copy (t);
7542 ms_va_list_type_node = t;
7549 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7552 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7558 /* GPR size of varargs save area. */
7559 if (cfun->va_list_gpr_size)
7560 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7562 ix86_varargs_gpr_size = 0;
7564 /* FPR size of varargs save area. We don't need it if we don't pass
7565 anything in SSE registers. */
7566 if (TARGET_SSE && cfun->va_list_fpr_size)
7567 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7569 ix86_varargs_fpr_size = 0;
7571 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7574 save_area = frame_pointer_rtx;
7575 set = get_varargs_alias_set ();
7577 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7578 if (max > X86_64_REGPARM_MAX)
7579 max = X86_64_REGPARM_MAX;
7581 for (i = cum->regno; i < max; i++)
7583 mem = gen_rtx_MEM (Pmode,
7584 plus_constant (save_area, i * UNITS_PER_WORD));
7585 MEM_NOTRAP_P (mem) = 1;
7586 set_mem_alias_set (mem, set);
7587 emit_move_insn (mem, gen_rtx_REG (Pmode,
7588 x86_64_int_parameter_registers[i]));
7591 if (ix86_varargs_fpr_size)
7593 enum machine_mode smode;
7596 /* Now emit code to save SSE registers. The AX parameter contains number
7597 of SSE parameter registers used to call this function, though all we
7598 actually check here is the zero/non-zero status. */
7600 label = gen_label_rtx ();
7601 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7602 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7605 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7606 we used movdqa (i.e. TImode) instead? Perhaps even better would
7607 be if we could determine the real mode of the data, via a hook
7608 into pass_stdarg. Ignore all that for now. */
7610 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7611 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7613 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7614 if (max > X86_64_SSE_REGPARM_MAX)
7615 max = X86_64_SSE_REGPARM_MAX;
7617 for (i = cum->sse_regno; i < max; ++i)
7619 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7620 mem = gen_rtx_MEM (smode, mem);
7621 MEM_NOTRAP_P (mem) = 1;
7622 set_mem_alias_set (mem, set);
7623 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7625 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7633 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7635 alias_set_type set = get_varargs_alias_set ();
7638 /* Reset to zero, as there might be a sysv vaarg used
7640 ix86_varargs_gpr_size = 0;
7641 ix86_varargs_fpr_size = 0;
7643 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7647 mem = gen_rtx_MEM (Pmode,
7648 plus_constant (virtual_incoming_args_rtx,
7649 i * UNITS_PER_WORD));
7650 MEM_NOTRAP_P (mem) = 1;
7651 set_mem_alias_set (mem, set);
7653 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7654 emit_move_insn (mem, reg);
7659 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7660 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7663 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7664 CUMULATIVE_ARGS next_cum;
7667 /* This argument doesn't appear to be used anymore. Which is good,
7668 because the old code here didn't suppress rtl generation. */
7669 gcc_assert (!no_rtl);
7674 fntype = TREE_TYPE (current_function_decl);
7676 /* For varargs, we do not want to skip the dummy va_dcl argument.
7677 For stdargs, we do want to skip the last named argument. */
7679 if (stdarg_p (fntype))
7680 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7683 if (cum->call_abi == MS_ABI)
7684 setup_incoming_varargs_ms_64 (&next_cum);
7686 setup_incoming_varargs_64 (&next_cum);
7689 /* Checks if TYPE is of kind va_list char *. */
7692 is_va_list_char_pointer (tree type)
7696 /* For 32-bit it is always true. */
7699 canonic = ix86_canonical_va_list_type (type);
7700 return (canonic == ms_va_list_type_node
7701 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7704 /* Implement va_start. */
7707 ix86_va_start (tree valist, rtx nextarg)
7709 HOST_WIDE_INT words, n_gpr, n_fpr;
7710 tree f_gpr, f_fpr, f_ovf, f_sav;
7711 tree gpr, fpr, ovf, sav, t;
7715 if (flag_split_stack
7716 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7718 unsigned int scratch_regno;
7720 /* When we are splitting the stack, we can't refer to the stack
7721 arguments using internal_arg_pointer, because they may be on
7722 the old stack. The split stack prologue will arrange to
7723 leave a pointer to the old stack arguments in a scratch
7724 register, which we here copy to a pseudo-register. The split
7725 stack prologue can't set the pseudo-register directly because
7726 it (the prologue) runs before any registers have been saved. */
7728 scratch_regno = split_stack_prologue_scratch_regno ();
7729 if (scratch_regno != INVALID_REGNUM)
7733 reg = gen_reg_rtx (Pmode);
7734 cfun->machine->split_stack_varargs_pointer = reg;
7737 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7741 push_topmost_sequence ();
7742 emit_insn_after (seq, entry_of_function ());
7743 pop_topmost_sequence ();
7747 /* Only 64bit target needs something special. */
7748 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7750 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7751 std_expand_builtin_va_start (valist, nextarg);
7756 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7757 next = expand_binop (ptr_mode, add_optab,
7758 cfun->machine->split_stack_varargs_pointer,
7759 crtl->args.arg_offset_rtx,
7760 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7761 convert_move (va_r, next, 0);
7766 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7767 f_fpr = DECL_CHAIN (f_gpr);
7768 f_ovf = DECL_CHAIN (f_fpr);
7769 f_sav = DECL_CHAIN (f_ovf);
7771 valist = build_simple_mem_ref (valist);
7772 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7773 /* The following should be folded into the MEM_REF offset. */
7774 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7776 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7778 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7780 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7783 /* Count number of gp and fp argument registers used. */
7784 words = crtl->args.info.words;
7785 n_gpr = crtl->args.info.regno;
7786 n_fpr = crtl->args.info.sse_regno;
7788 if (cfun->va_list_gpr_size)
7790 type = TREE_TYPE (gpr);
7791 t = build2 (MODIFY_EXPR, type,
7792 gpr, build_int_cst (type, n_gpr * 8));
7793 TREE_SIDE_EFFECTS (t) = 1;
7794 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7797 if (TARGET_SSE && cfun->va_list_fpr_size)
7799 type = TREE_TYPE (fpr);
7800 t = build2 (MODIFY_EXPR, type, fpr,
7801 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7802 TREE_SIDE_EFFECTS (t) = 1;
7803 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7806 /* Find the overflow area. */
7807 type = TREE_TYPE (ovf);
7808 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7809 ovf_rtx = crtl->args.internal_arg_pointer;
7811 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7812 t = make_tree (type, ovf_rtx);
7814 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7815 t = build2 (MODIFY_EXPR, type, ovf, t);
7816 TREE_SIDE_EFFECTS (t) = 1;
7817 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7819 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7821 /* Find the register save area.
7822 Prologue of the function save it right above stack frame. */
7823 type = TREE_TYPE (sav);
7824 t = make_tree (type, frame_pointer_rtx);
7825 if (!ix86_varargs_gpr_size)
7826 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7827 t = build2 (MODIFY_EXPR, type, sav, t);
7828 TREE_SIDE_EFFECTS (t) = 1;
7829 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7833 /* Implement va_arg. */
7836 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7839 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7840 tree f_gpr, f_fpr, f_ovf, f_sav;
7841 tree gpr, fpr, ovf, sav, t;
7843 tree lab_false, lab_over = NULL_TREE;
7848 enum machine_mode nat_mode;
7849 unsigned int arg_boundary;
7851 /* Only 64bit target needs something special. */
7852 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7853 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7855 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7856 f_fpr = DECL_CHAIN (f_gpr);
7857 f_ovf = DECL_CHAIN (f_fpr);
7858 f_sav = DECL_CHAIN (f_ovf);
7860 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7861 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7862 valist = build_va_arg_indirect_ref (valist);
7863 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7864 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7865 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7867 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7869 type = build_pointer_type (type);
7870 size = int_size_in_bytes (type);
7871 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7873 nat_mode = type_natural_mode (type, NULL);
7882 /* Unnamed 256bit vector mode parameters are passed on stack. */
7883 if (!TARGET_64BIT_MS_ABI)
7890 container = construct_container (nat_mode, TYPE_MODE (type),
7891 type, 0, X86_64_REGPARM_MAX,
7892 X86_64_SSE_REGPARM_MAX, intreg,
7897 /* Pull the value out of the saved registers. */
7899 addr = create_tmp_var (ptr_type_node, "addr");
7903 int needed_intregs, needed_sseregs;
7905 tree int_addr, sse_addr;
7907 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7908 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7910 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7912 need_temp = (!REG_P (container)
7913 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7914 || TYPE_ALIGN (type) > 128));
7916 /* In case we are passing structure, verify that it is consecutive block
7917 on the register save area. If not we need to do moves. */
7918 if (!need_temp && !REG_P (container))
7920 /* Verify that all registers are strictly consecutive */
7921 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7925 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7927 rtx slot = XVECEXP (container, 0, i);
7928 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7929 || INTVAL (XEXP (slot, 1)) != i * 16)
7937 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7939 rtx slot = XVECEXP (container, 0, i);
7940 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7941 || INTVAL (XEXP (slot, 1)) != i * 8)
7953 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7954 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7957 /* First ensure that we fit completely in registers. */
7960 t = build_int_cst (TREE_TYPE (gpr),
7961 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7962 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7963 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7964 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7965 gimplify_and_add (t, pre_p);
7969 t = build_int_cst (TREE_TYPE (fpr),
7970 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7971 + X86_64_REGPARM_MAX * 8);
7972 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7973 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7974 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7975 gimplify_and_add (t, pre_p);
7978 /* Compute index to start of area used for integer regs. */
7981 /* int_addr = gpr + sav; */
7982 t = fold_build_pointer_plus (sav, gpr);
7983 gimplify_assign (int_addr, t, pre_p);
7987 /* sse_addr = fpr + sav; */
7988 t = fold_build_pointer_plus (sav, fpr);
7989 gimplify_assign (sse_addr, t, pre_p);
7993 int i, prev_size = 0;
7994 tree temp = create_tmp_var (type, "va_arg_tmp");
7997 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7998 gimplify_assign (addr, t, pre_p);
8000 for (i = 0; i < XVECLEN (container, 0); i++)
8002 rtx slot = XVECEXP (container, 0, i);
8003 rtx reg = XEXP (slot, 0);
8004 enum machine_mode mode = GET_MODE (reg);
8010 tree dest_addr, dest;
8011 int cur_size = GET_MODE_SIZE (mode);
8013 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8014 prev_size = INTVAL (XEXP (slot, 1));
8015 if (prev_size + cur_size > size)
8017 cur_size = size - prev_size;
8018 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8019 if (mode == BLKmode)
8022 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8023 if (mode == GET_MODE (reg))
8024 addr_type = build_pointer_type (piece_type);
8026 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8028 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8031 if (SSE_REGNO_P (REGNO (reg)))
8033 src_addr = sse_addr;
8034 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8038 src_addr = int_addr;
8039 src_offset = REGNO (reg) * 8;
8041 src_addr = fold_convert (addr_type, src_addr);
8042 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8044 dest_addr = fold_convert (daddr_type, addr);
8045 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8046 if (cur_size == GET_MODE_SIZE (mode))
8048 src = build_va_arg_indirect_ref (src_addr);
8049 dest = build_va_arg_indirect_ref (dest_addr);
8051 gimplify_assign (dest, src, pre_p);
8056 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8057 3, dest_addr, src_addr,
8058 size_int (cur_size));
8059 gimplify_and_add (copy, pre_p);
8061 prev_size += cur_size;
8067 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8068 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8069 gimplify_assign (gpr, t, pre_p);
8074 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8075 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8076 gimplify_assign (fpr, t, pre_p);
8079 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8081 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8084 /* ... otherwise out of the overflow area. */
8086 /* When we align parameter on stack for caller, if the parameter
8087 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8088 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8089 here with caller. */
8090 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8091 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8092 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8094 /* Care for on-stack alignment if needed. */
8095 if (arg_boundary <= 64 || size == 0)
8099 HOST_WIDE_INT align = arg_boundary / 8;
8100 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8101 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8102 build_int_cst (TREE_TYPE (t), -align));
8105 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8106 gimplify_assign (addr, t, pre_p);
8108 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8109 gimplify_assign (unshare_expr (ovf), t, pre_p);
8112 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8114 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8115 addr = fold_convert (ptrtype, addr);
8118 addr = build_va_arg_indirect_ref (addr);
8119 return build_va_arg_indirect_ref (addr);
8122 /* Return true if OPNUM's MEM should be matched
8123 in movabs* patterns. */
8126 ix86_check_movabs (rtx insn, int opnum)
8130 set = PATTERN (insn);
8131 if (GET_CODE (set) == PARALLEL)
8132 set = XVECEXP (set, 0, 0);
8133 gcc_assert (GET_CODE (set) == SET);
8134 mem = XEXP (set, opnum);
8135 while (GET_CODE (mem) == SUBREG)
8136 mem = SUBREG_REG (mem);
8137 gcc_assert (MEM_P (mem));
8138 return volatile_ok || !MEM_VOLATILE_P (mem);
8141 /* Initialize the table of extra 80387 mathematical constants. */
8144 init_ext_80387_constants (void)
8146 static const char * cst[5] =
8148 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8149 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8150 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8151 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8152 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8156 for (i = 0; i < 5; i++)
8158 real_from_string (&ext_80387_constants_table[i], cst[i]);
8159 /* Ensure each constant is rounded to XFmode precision. */
8160 real_convert (&ext_80387_constants_table[i],
8161 XFmode, &ext_80387_constants_table[i]);
8164 ext_80387_constants_init = 1;
8167 /* Return non-zero if the constant is something that
8168 can be loaded with a special instruction. */
8171 standard_80387_constant_p (rtx x)
8173 enum machine_mode mode = GET_MODE (x);
8177 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8180 if (x == CONST0_RTX (mode))
8182 if (x == CONST1_RTX (mode))
8185 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8187 /* For XFmode constants, try to find a special 80387 instruction when
8188 optimizing for size or on those CPUs that benefit from them. */
8190 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8194 if (! ext_80387_constants_init)
8195 init_ext_80387_constants ();
8197 for (i = 0; i < 5; i++)
8198 if (real_identical (&r, &ext_80387_constants_table[i]))
8202 /* Load of the constant -0.0 or -1.0 will be split as
8203 fldz;fchs or fld1;fchs sequence. */
8204 if (real_isnegzero (&r))
8206 if (real_identical (&r, &dconstm1))
8212 /* Return the opcode of the special instruction to be used to load
8216 standard_80387_constant_opcode (rtx x)
8218 switch (standard_80387_constant_p (x))
8242 /* Return the CONST_DOUBLE representing the 80387 constant that is
8243 loaded by the specified special instruction. The argument IDX
8244 matches the return value from standard_80387_constant_p. */
8247 standard_80387_constant_rtx (int idx)
8251 if (! ext_80387_constants_init)
8252 init_ext_80387_constants ();
8268 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8272 /* Return 1 if X is all 0s and 2 if x is all 1s
8273 in supported SSE/AVX vector mode. */
8276 standard_sse_constant_p (rtx x)
8278 enum machine_mode mode = GET_MODE (x);
8280 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8282 if (vector_all_ones_operand (x, mode))
8304 /* Return the opcode of the special instruction to be used to load
8308 standard_sse_constant_opcode (rtx insn, rtx x)
8310 switch (standard_sse_constant_p (x))
8313 switch (get_attr_mode (insn))
8316 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8317 return "%vpxor\t%0, %d0";
8319 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8320 return "%vxorpd\t%0, %d0";
8322 return "%vxorps\t%0, %d0";
8325 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8326 return "vpxor\t%x0, %x0, %x0";
8328 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8329 return "vxorpd\t%x0, %x0, %x0";
8331 return "vxorps\t%x0, %x0, %x0";
8339 return "vpcmpeqd\t%0, %0, %0";
8341 return "pcmpeqd\t%0, %0";
8349 /* Returns true if OP contains a symbol reference */
8352 symbolic_reference_mentioned_p (rtx op)
8357 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8360 fmt = GET_RTX_FORMAT (GET_CODE (op));
8361 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8367 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8368 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8372 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8379 /* Return true if it is appropriate to emit `ret' instructions in the
8380 body of a function. Do this only if the epilogue is simple, needing a
8381 couple of insns. Prior to reloading, we can't tell how many registers
8382 must be saved, so return false then. Return false if there is no frame
8383 marker to de-allocate. */
8386 ix86_can_use_return_insn_p (void)
8388 struct ix86_frame frame;
8390 if (! reload_completed || frame_pointer_needed)
8393 /* Don't allow more than 32k pop, since that's all we can do
8394 with one instruction. */
8395 if (crtl->args.pops_args && crtl->args.size >= 32768)
8398 ix86_compute_frame_layout (&frame);
8399 return (frame.stack_pointer_offset == UNITS_PER_WORD
8400 && (frame.nregs + frame.nsseregs) == 0);
8403 /* Value should be nonzero if functions must have frame pointers.
8404 Zero means the frame pointer need not be set up (and parms may
8405 be accessed via the stack pointer) in functions that seem suitable. */
8408 ix86_frame_pointer_required (void)
8410 /* If we accessed previous frames, then the generated code expects
8411 to be able to access the saved ebp value in our frame. */
8412 if (cfun->machine->accesses_prev_frame)
8415 /* Several x86 os'es need a frame pointer for other reasons,
8416 usually pertaining to setjmp. */
8417 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8420 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8421 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
8424 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8425 allocation is 4GB. */
8426 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
8429 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8430 turns off the frame pointer by default. Turn it back on now if
8431 we've not got a leaf function. */
8432 if (TARGET_OMIT_LEAF_FRAME_POINTER
8433 && (!current_function_is_leaf
8434 || ix86_current_function_calls_tls_descriptor))
8437 if (crtl->profile && !flag_fentry)
8443 /* Record that the current function accesses previous call frames. */
8446 ix86_setup_frame_addresses (void)
8448 cfun->machine->accesses_prev_frame = 1;
8451 #ifndef USE_HIDDEN_LINKONCE
8452 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8453 # define USE_HIDDEN_LINKONCE 1
8455 # define USE_HIDDEN_LINKONCE 0
8459 static int pic_labels_used;
8461 /* Fills in the label name that should be used for a pc thunk for
8462 the given register. */
8465 get_pc_thunk_name (char name[32], unsigned int regno)
8467 gcc_assert (!TARGET_64BIT);
8469 if (USE_HIDDEN_LINKONCE)
8470 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
8472 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8476 /* This function generates code for -fpic that loads %ebx with
8477 the return address of the caller and then returns. */
8480 ix86_code_end (void)
8485 for (regno = AX_REG; regno <= SP_REG; regno++)
8490 if (!(pic_labels_used & (1 << regno)))
8493 get_pc_thunk_name (name, regno);
8495 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8496 get_identifier (name),
8497 build_function_type_list (void_type_node, NULL_TREE));
8498 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8499 NULL_TREE, void_type_node);
8500 TREE_PUBLIC (decl) = 1;
8501 TREE_STATIC (decl) = 1;
8506 switch_to_section (darwin_sections[text_coal_section]);
8507 fputs ("\t.weak_definition\t", asm_out_file);
8508 assemble_name (asm_out_file, name);
8509 fputs ("\n\t.private_extern\t", asm_out_file);
8510 assemble_name (asm_out_file, name);
8511 putc ('\n', asm_out_file);
8512 ASM_OUTPUT_LABEL (asm_out_file, name);
8513 DECL_WEAK (decl) = 1;
8517 if (USE_HIDDEN_LINKONCE)
8519 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8521 targetm.asm_out.unique_section (decl, 0);
8522 switch_to_section (get_named_section (decl, NULL, 0));
8524 targetm.asm_out.globalize_label (asm_out_file, name);
8525 fputs ("\t.hidden\t", asm_out_file);
8526 assemble_name (asm_out_file, name);
8527 putc ('\n', asm_out_file);
8528 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8532 switch_to_section (text_section);
8533 ASM_OUTPUT_LABEL (asm_out_file, name);
8536 DECL_INITIAL (decl) = make_node (BLOCK);
8537 current_function_decl = decl;
8538 init_function_start (decl);
8539 first_function_block_is_cold = false;
8540 /* Make sure unwind info is emitted for the thunk if needed. */
8541 final_start_function (emit_barrier (), asm_out_file, 1);
8543 /* Pad stack IP move with 4 instructions (two NOPs count
8544 as one instruction). */
8545 if (TARGET_PAD_SHORT_FUNCTION)
8550 fputs ("\tnop\n", asm_out_file);
8553 xops[0] = gen_rtx_REG (Pmode, regno);
8554 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8555 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8556 fputs ("\tret\n", asm_out_file);
8557 final_end_function ();
8558 init_insn_lengths ();
8559 free_after_compilation (cfun);
8561 current_function_decl = NULL;
8564 if (flag_split_stack)
8565 file_end_indicate_split_stack ();
8568 /* Emit code for the SET_GOT patterns. */
8571 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8577 if (TARGET_VXWORKS_RTP && flag_pic)
8579 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8580 xops[2] = gen_rtx_MEM (Pmode,
8581 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8582 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8584 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8585 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8586 an unadorned address. */
8587 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8588 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8589 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8593 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8597 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8599 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8602 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8603 is what will be referenced by the Mach-O PIC subsystem. */
8605 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8608 targetm.asm_out.internal_label (asm_out_file, "L",
8609 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8614 get_pc_thunk_name (name, REGNO (dest));
8615 pic_labels_used |= 1 << REGNO (dest);
8617 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8618 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8619 output_asm_insn ("call\t%X2", xops);
8620 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8621 is what will be referenced by the Mach-O PIC subsystem. */
8624 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8626 targetm.asm_out.internal_label (asm_out_file, "L",
8627 CODE_LABEL_NUMBER (label));
8632 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8637 /* Generate an "push" pattern for input ARG. */
8642 struct machine_function *m = cfun->machine;
8644 if (m->fs.cfa_reg == stack_pointer_rtx)
8645 m->fs.cfa_offset += UNITS_PER_WORD;
8646 m->fs.sp_offset += UNITS_PER_WORD;
8648 return gen_rtx_SET (VOIDmode,
8650 gen_rtx_PRE_DEC (Pmode,
8651 stack_pointer_rtx)),
8655 /* Generate an "pop" pattern for input ARG. */
8660 return gen_rtx_SET (VOIDmode,
8663 gen_rtx_POST_INC (Pmode,
8664 stack_pointer_rtx)));
8667 /* Return >= 0 if there is an unused call-clobbered register available
8668 for the entire function. */
8671 ix86_select_alt_pic_regnum (void)
8673 if (current_function_is_leaf
8675 && !ix86_current_function_calls_tls_descriptor)
8678 /* Can't use the same register for both PIC and DRAP. */
8680 drap = REGNO (crtl->drap_reg);
8683 for (i = 2; i >= 0; --i)
8684 if (i != drap && !df_regs_ever_live_p (i))
8688 return INVALID_REGNUM;
8691 /* Return TRUE if we need to save REGNO. */
8694 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8696 if (pic_offset_table_rtx
8697 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8698 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8700 || crtl->calls_eh_return
8701 || crtl->uses_const_pool))
8702 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8704 if (crtl->calls_eh_return && maybe_eh_return)
8709 unsigned test = EH_RETURN_DATA_REGNO (i);
8710 if (test == INVALID_REGNUM)
8717 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8720 return (df_regs_ever_live_p (regno)
8721 && !call_used_regs[regno]
8722 && !fixed_regs[regno]
8723 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8726 /* Return number of saved general prupose registers. */
8729 ix86_nsaved_regs (void)
8734 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8735 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8740 /* Return number of saved SSE registrers. */
8743 ix86_nsaved_sseregs (void)
8748 if (!TARGET_64BIT_MS_ABI)
8750 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8751 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8756 /* Given FROM and TO register numbers, say whether this elimination is
8757 allowed. If stack alignment is needed, we can only replace argument
8758 pointer with hard frame pointer, or replace frame pointer with stack
8759 pointer. Otherwise, frame pointer elimination is automatically
8760 handled and all other eliminations are valid. */
8763 ix86_can_eliminate (const int from, const int to)
8765 if (stack_realign_fp)
8766 return ((from == ARG_POINTER_REGNUM
8767 && to == HARD_FRAME_POINTER_REGNUM)
8768 || (from == FRAME_POINTER_REGNUM
8769 && to == STACK_POINTER_REGNUM));
8771 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8774 /* Return the offset between two registers, one to be eliminated, and the other
8775 its replacement, at the start of a routine. */
8778 ix86_initial_elimination_offset (int from, int to)
8780 struct ix86_frame frame;
8781 ix86_compute_frame_layout (&frame);
8783 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8784 return frame.hard_frame_pointer_offset;
8785 else if (from == FRAME_POINTER_REGNUM
8786 && to == HARD_FRAME_POINTER_REGNUM)
8787 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8790 gcc_assert (to == STACK_POINTER_REGNUM);
8792 if (from == ARG_POINTER_REGNUM)
8793 return frame.stack_pointer_offset;
8795 gcc_assert (from == FRAME_POINTER_REGNUM);
8796 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8800 /* In a dynamically-aligned function, we can't know the offset from
8801 stack pointer to frame pointer, so we must ensure that setjmp
8802 eliminates fp against the hard fp (%ebp) rather than trying to
8803 index from %esp up to the top of the frame across a gap that is
8804 of unknown (at compile-time) size. */
8806 ix86_builtin_setjmp_frame_value (void)
8808 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8811 /* When using -fsplit-stack, the allocation routines set a field in
8812 the TCB to the bottom of the stack plus this much space, measured
8815 #define SPLIT_STACK_AVAILABLE 256
8817 /* Fill structure ix86_frame about frame of currently computed function. */
8820 ix86_compute_frame_layout (struct ix86_frame *frame)
8822 unsigned int stack_alignment_needed;
8823 HOST_WIDE_INT offset;
8824 unsigned int preferred_alignment;
8825 HOST_WIDE_INT size = get_frame_size ();
8826 HOST_WIDE_INT to_allocate;
8828 frame->nregs = ix86_nsaved_regs ();
8829 frame->nsseregs = ix86_nsaved_sseregs ();
8831 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8832 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8834 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8835 function prologues and leaf. */
8836 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8837 && (!current_function_is_leaf || cfun->calls_alloca != 0
8838 || ix86_current_function_calls_tls_descriptor))
8840 preferred_alignment = 16;
8841 stack_alignment_needed = 16;
8842 crtl->preferred_stack_boundary = 128;
8843 crtl->stack_alignment_needed = 128;
8846 gcc_assert (!size || stack_alignment_needed);
8847 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8848 gcc_assert (preferred_alignment <= stack_alignment_needed);
8850 /* For SEH we have to limit the amount of code movement into the prologue.
8851 At present we do this via a BLOCKAGE, at which point there's very little
8852 scheduling that can be done, which means that there's very little point
8853 in doing anything except PUSHs. */
8855 cfun->machine->use_fast_prologue_epilogue = false;
8857 /* During reload iteration the amount of registers saved can change.
8858 Recompute the value as needed. Do not recompute when amount of registers
8859 didn't change as reload does multiple calls to the function and does not
8860 expect the decision to change within single iteration. */
8861 else if (!optimize_function_for_size_p (cfun)
8862 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8864 int count = frame->nregs;
8865 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8867 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8869 /* The fast prologue uses move instead of push to save registers. This
8870 is significantly longer, but also executes faster as modern hardware
8871 can execute the moves in parallel, but can't do that for push/pop.
8873 Be careful about choosing what prologue to emit: When function takes
8874 many instructions to execute we may use slow version as well as in
8875 case function is known to be outside hot spot (this is known with
8876 feedback only). Weight the size of function by number of registers
8877 to save as it is cheap to use one or two push instructions but very
8878 slow to use many of them. */
8880 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8881 if (node->frequency < NODE_FREQUENCY_NORMAL
8882 || (flag_branch_probabilities
8883 && node->frequency < NODE_FREQUENCY_HOT))
8884 cfun->machine->use_fast_prologue_epilogue = false;
8886 cfun->machine->use_fast_prologue_epilogue
8887 = !expensive_function_p (count);
8890 frame->save_regs_using_mov
8891 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
8892 /* If static stack checking is enabled and done with probes,
8893 the registers need to be saved before allocating the frame. */
8894 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
8896 /* Skip return address. */
8897 offset = UNITS_PER_WORD;
8899 /* Skip pushed static chain. */
8900 if (ix86_static_chain_on_stack)
8901 offset += UNITS_PER_WORD;
8903 /* Skip saved base pointer. */
8904 if (frame_pointer_needed)
8905 offset += UNITS_PER_WORD;
8906 frame->hfp_save_offset = offset;
8908 /* The traditional frame pointer location is at the top of the frame. */
8909 frame->hard_frame_pointer_offset = offset;
8911 /* Register save area */
8912 offset += frame->nregs * UNITS_PER_WORD;
8913 frame->reg_save_offset = offset;
8915 /* On SEH target, registers are pushed just before the frame pointer
8918 frame->hard_frame_pointer_offset = offset;
8920 /* Align and set SSE register save area. */
8921 if (frame->nsseregs)
8923 /* The only ABI that has saved SSE registers (Win64) also has a
8924 16-byte aligned default stack, and thus we don't need to be
8925 within the re-aligned local stack frame to save them. */
8926 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8927 offset = (offset + 16 - 1) & -16;
8928 offset += frame->nsseregs * 16;
8930 frame->sse_reg_save_offset = offset;
8932 /* The re-aligned stack starts here. Values before this point are not
8933 directly comparable with values below this point. In order to make
8934 sure that no value happens to be the same before and after, force
8935 the alignment computation below to add a non-zero value. */
8936 if (stack_realign_fp)
8937 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8940 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8941 offset += frame->va_arg_size;
8943 /* Align start of frame for local function. */
8944 if (stack_realign_fp
8945 || offset != frame->sse_reg_save_offset
8947 || !current_function_is_leaf
8948 || cfun->calls_alloca
8949 || ix86_current_function_calls_tls_descriptor)
8950 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8952 /* Frame pointer points here. */
8953 frame->frame_pointer_offset = offset;
8957 /* Add outgoing arguments area. Can be skipped if we eliminated
8958 all the function calls as dead code.
8959 Skipping is however impossible when function calls alloca. Alloca
8960 expander assumes that last crtl->outgoing_args_size
8961 of stack frame are unused. */
8962 if (ACCUMULATE_OUTGOING_ARGS
8963 && (!current_function_is_leaf || cfun->calls_alloca
8964 || ix86_current_function_calls_tls_descriptor))
8966 offset += crtl->outgoing_args_size;
8967 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8970 frame->outgoing_arguments_size = 0;
8972 /* Align stack boundary. Only needed if we're calling another function
8974 if (!current_function_is_leaf || cfun->calls_alloca
8975 || ix86_current_function_calls_tls_descriptor)
8976 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8978 /* We've reached end of stack frame. */
8979 frame->stack_pointer_offset = offset;
8981 /* Size prologue needs to allocate. */
8982 to_allocate = offset - frame->sse_reg_save_offset;
8984 if ((!to_allocate && frame->nregs <= 1)
8985 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8986 frame->save_regs_using_mov = false;
8988 if (ix86_using_red_zone ()
8989 && current_function_sp_is_unchanging
8990 && current_function_is_leaf
8991 && !ix86_current_function_calls_tls_descriptor)
8993 frame->red_zone_size = to_allocate;
8994 if (frame->save_regs_using_mov)
8995 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8996 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8997 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9000 frame->red_zone_size = 0;
9001 frame->stack_pointer_offset -= frame->red_zone_size;
9003 /* The SEH frame pointer location is near the bottom of the frame.
9004 This is enforced by the fact that the difference between the
9005 stack pointer and the frame pointer is limited to 240 bytes in
9006 the unwind data structure. */
9011 /* If we can leave the frame pointer where it is, do so. Also, returns
9012 the establisher frame for __builtin_frame_address (0). */
9013 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9014 if (diff <= SEH_MAX_FRAME_SIZE
9015 && (diff > 240 || (diff & 15) != 0)
9016 && !crtl->accesses_prior_frames)
9018 /* Ideally we'd determine what portion of the local stack frame
9019 (within the constraint of the lowest 240) is most heavily used.
9020 But without that complication, simply bias the frame pointer
9021 by 128 bytes so as to maximize the amount of the local stack
9022 frame that is addressable with 8-bit offsets. */
9023 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9028 /* This is semi-inlined memory_address_length, but simplified
9029 since we know that we're always dealing with reg+offset, and
9030 to avoid having to create and discard all that rtl. */
9033 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9039 /* EBP and R13 cannot be encoded without an offset. */
9040 len = (regno == BP_REG || regno == R13_REG);
9042 else if (IN_RANGE (offset, -128, 127))
9045 /* ESP and R12 must be encoded with a SIB byte. */
9046 if (regno == SP_REG || regno == R12_REG)
9052 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9053 The valid base registers are taken from CFUN->MACHINE->FS. */
9056 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9058 const struct machine_function *m = cfun->machine;
9059 rtx base_reg = NULL;
9060 HOST_WIDE_INT base_offset = 0;
9062 if (m->use_fast_prologue_epilogue)
9064 /* Choose the base register most likely to allow the most scheduling
9065 opportunities. Generally FP is valid througout the function,
9066 while DRAP must be reloaded within the epilogue. But choose either
9067 over the SP due to increased encoding size. */
9071 base_reg = hard_frame_pointer_rtx;
9072 base_offset = m->fs.fp_offset - cfa_offset;
9074 else if (m->fs.drap_valid)
9076 base_reg = crtl->drap_reg;
9077 base_offset = 0 - cfa_offset;
9079 else if (m->fs.sp_valid)
9081 base_reg = stack_pointer_rtx;
9082 base_offset = m->fs.sp_offset - cfa_offset;
9087 HOST_WIDE_INT toffset;
9090 /* Choose the base register with the smallest address encoding.
9091 With a tie, choose FP > DRAP > SP. */
9094 base_reg = stack_pointer_rtx;
9095 base_offset = m->fs.sp_offset - cfa_offset;
9096 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9098 if (m->fs.drap_valid)
9100 toffset = 0 - cfa_offset;
9101 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9104 base_reg = crtl->drap_reg;
9105 base_offset = toffset;
9111 toffset = m->fs.fp_offset - cfa_offset;
9112 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9115 base_reg = hard_frame_pointer_rtx;
9116 base_offset = toffset;
9121 gcc_assert (base_reg != NULL);
9123 return plus_constant (base_reg, base_offset);
9126 /* Emit code to save registers in the prologue. */
9129 ix86_emit_save_regs (void)
9134 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9135 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9137 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9138 RTX_FRAME_RELATED_P (insn) = 1;
9142 /* Emit a single register save at CFA - CFA_OFFSET. */
9145 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9146 HOST_WIDE_INT cfa_offset)
9148 struct machine_function *m = cfun->machine;
9149 rtx reg = gen_rtx_REG (mode, regno);
9150 rtx mem, addr, base, insn;
9152 addr = choose_baseaddr (cfa_offset);
9153 mem = gen_frame_mem (mode, addr);
9155 /* For SSE saves, we need to indicate the 128-bit alignment. */
9156 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9158 insn = emit_move_insn (mem, reg);
9159 RTX_FRAME_RELATED_P (insn) = 1;
9162 if (GET_CODE (base) == PLUS)
9163 base = XEXP (base, 0);
9164 gcc_checking_assert (REG_P (base));
9166 /* When saving registers into a re-aligned local stack frame, avoid
9167 any tricky guessing by dwarf2out. */
9168 if (m->fs.realigned)
9170 gcc_checking_assert (stack_realign_drap);
9172 if (regno == REGNO (crtl->drap_reg))
9174 /* A bit of a hack. We force the DRAP register to be saved in
9175 the re-aligned stack frame, which provides us with a copy
9176 of the CFA that will last past the prologue. Install it. */
9177 gcc_checking_assert (cfun->machine->fs.fp_valid);
9178 addr = plus_constant (hard_frame_pointer_rtx,
9179 cfun->machine->fs.fp_offset - cfa_offset);
9180 mem = gen_rtx_MEM (mode, addr);
9181 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9185 /* The frame pointer is a stable reference within the
9186 aligned frame. Use it. */
9187 gcc_checking_assert (cfun->machine->fs.fp_valid);
9188 addr = plus_constant (hard_frame_pointer_rtx,
9189 cfun->machine->fs.fp_offset - cfa_offset);
9190 mem = gen_rtx_MEM (mode, addr);
9191 add_reg_note (insn, REG_CFA_EXPRESSION,
9192 gen_rtx_SET (VOIDmode, mem, reg));
9196 /* The memory may not be relative to the current CFA register,
9197 which means that we may need to generate a new pattern for
9198 use by the unwind info. */
9199 else if (base != m->fs.cfa_reg)
9201 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9202 mem = gen_rtx_MEM (mode, addr);
9203 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9207 /* Emit code to save registers using MOV insns.
9208 First register is stored at CFA - CFA_OFFSET. */
9210 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9214 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9215 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9217 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9218 cfa_offset -= UNITS_PER_WORD;
9222 /* Emit code to save SSE registers using MOV insns.
9223 First register is stored at CFA - CFA_OFFSET. */
9225 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9229 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9230 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9232 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9237 static GTY(()) rtx queued_cfa_restores;
9239 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9240 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9241 Don't add the note if the previously saved value will be left untouched
9242 within stack red-zone till return, as unwinders can find the same value
9243 in the register and on the stack. */
9246 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9248 if (!crtl->shrink_wrapped
9249 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9254 add_reg_note (insn, REG_CFA_RESTORE, reg);
9255 RTX_FRAME_RELATED_P (insn) = 1;
9259 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9262 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9265 ix86_add_queued_cfa_restore_notes (rtx insn)
9268 if (!queued_cfa_restores)
9270 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9272 XEXP (last, 1) = REG_NOTES (insn);
9273 REG_NOTES (insn) = queued_cfa_restores;
9274 queued_cfa_restores = NULL_RTX;
9275 RTX_FRAME_RELATED_P (insn) = 1;
9278 /* Expand prologue or epilogue stack adjustment.
9279 The pattern exist to put a dependency on all ebp-based memory accesses.
9280 STYLE should be negative if instructions should be marked as frame related,
9281 zero if %r11 register is live and cannot be freely used and positive
9285 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9286 int style, bool set_cfa)
9288 struct machine_function *m = cfun->machine;
9290 bool add_frame_related_expr = false;
9293 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9294 else if (x86_64_immediate_operand (offset, DImode))
9295 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9299 /* r11 is used by indirect sibcall return as well, set before the
9300 epilogue and used after the epilogue. */
9302 tmp = gen_rtx_REG (DImode, R11_REG);
9305 gcc_assert (src != hard_frame_pointer_rtx
9306 && dest != hard_frame_pointer_rtx);
9307 tmp = hard_frame_pointer_rtx;
9309 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9311 add_frame_related_expr = true;
9313 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9316 insn = emit_insn (insn);
9318 ix86_add_queued_cfa_restore_notes (insn);
9324 gcc_assert (m->fs.cfa_reg == src);
9325 m->fs.cfa_offset += INTVAL (offset);
9326 m->fs.cfa_reg = dest;
9328 r = gen_rtx_PLUS (Pmode, src, offset);
9329 r = gen_rtx_SET (VOIDmode, dest, r);
9330 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9331 RTX_FRAME_RELATED_P (insn) = 1;
9335 RTX_FRAME_RELATED_P (insn) = 1;
9336 if (add_frame_related_expr)
9338 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9339 r = gen_rtx_SET (VOIDmode, dest, r);
9340 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9344 if (dest == stack_pointer_rtx)
9346 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9347 bool valid = m->fs.sp_valid;
9349 if (src == hard_frame_pointer_rtx)
9351 valid = m->fs.fp_valid;
9352 ooffset = m->fs.fp_offset;
9354 else if (src == crtl->drap_reg)
9356 valid = m->fs.drap_valid;
9361 /* Else there are two possibilities: SP itself, which we set
9362 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9363 taken care of this by hand along the eh_return path. */
9364 gcc_checking_assert (src == stack_pointer_rtx
9365 || offset == const0_rtx);
9368 m->fs.sp_offset = ooffset - INTVAL (offset);
9369 m->fs.sp_valid = valid;
9373 /* Find an available register to be used as dynamic realign argument
9374 pointer regsiter. Such a register will be written in prologue and
9375 used in begin of body, so it must not be
9376 1. parameter passing register.
9378 We reuse static-chain register if it is available. Otherwise, we
9379 use DI for i386 and R13 for x86-64. We chose R13 since it has
9382 Return: the regno of chosen register. */
9385 find_drap_reg (void)
9387 tree decl = cfun->decl;
9391 /* Use R13 for nested function or function need static chain.
9392 Since function with tail call may use any caller-saved
9393 registers in epilogue, DRAP must not use caller-saved
9394 register in such case. */
9395 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9402 /* Use DI for nested function or function need static chain.
9403 Since function with tail call may use any caller-saved
9404 registers in epilogue, DRAP must not use caller-saved
9405 register in such case. */
9406 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9409 /* Reuse static chain register if it isn't used for parameter
9411 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9413 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9414 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9421 /* Return minimum incoming stack alignment. */
9424 ix86_minimum_incoming_stack_boundary (bool sibcall)
9426 unsigned int incoming_stack_boundary;
9428 /* Prefer the one specified at command line. */
9429 if (ix86_user_incoming_stack_boundary)
9430 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9431 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9432 if -mstackrealign is used, it isn't used for sibcall check and
9433 estimated stack alignment is 128bit. */
9436 && ix86_force_align_arg_pointer
9437 && crtl->stack_alignment_estimated == 128)
9438 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9440 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9442 /* Incoming stack alignment can be changed on individual functions
9443 via force_align_arg_pointer attribute. We use the smallest
9444 incoming stack boundary. */
9445 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9446 && lookup_attribute (ix86_force_align_arg_pointer_string,
9447 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9448 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9450 /* The incoming stack frame has to be aligned at least at
9451 parm_stack_boundary. */
9452 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9453 incoming_stack_boundary = crtl->parm_stack_boundary;
9455 /* Stack at entrance of main is aligned by runtime. We use the
9456 smallest incoming stack boundary. */
9457 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9458 && DECL_NAME (current_function_decl)
9459 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9460 && DECL_FILE_SCOPE_P (current_function_decl))
9461 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9463 return incoming_stack_boundary;
9466 /* Update incoming stack boundary and estimated stack alignment. */
9469 ix86_update_stack_boundary (void)
9471 ix86_incoming_stack_boundary
9472 = ix86_minimum_incoming_stack_boundary (false);
9474 /* x86_64 vararg needs 16byte stack alignment for register save
9478 && crtl->stack_alignment_estimated < 128)
9479 crtl->stack_alignment_estimated = 128;
9482 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9483 needed or an rtx for DRAP otherwise. */
9486 ix86_get_drap_rtx (void)
9488 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9489 crtl->need_drap = true;
9491 if (stack_realign_drap)
9493 /* Assign DRAP to vDRAP and returns vDRAP */
9494 unsigned int regno = find_drap_reg ();
9499 arg_ptr = gen_rtx_REG (Pmode, regno);
9500 crtl->drap_reg = arg_ptr;
9503 drap_vreg = copy_to_reg (arg_ptr);
9507 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9510 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9511 RTX_FRAME_RELATED_P (insn) = 1;
9519 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9522 ix86_internal_arg_pointer (void)
9524 return virtual_incoming_args_rtx;
9527 struct scratch_reg {
9532 /* Return a short-lived scratch register for use on function entry.
9533 In 32-bit mode, it is valid only after the registers are saved
9534 in the prologue. This register must be released by means of
9535 release_scratch_register_on_entry once it is dead. */
9538 get_scratch_register_on_entry (struct scratch_reg *sr)
9546 /* We always use R11 in 64-bit mode. */
9551 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9553 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9554 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9555 int regparm = ix86_function_regparm (fntype, decl);
9557 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9559 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9560 for the static chain register. */
9561 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9562 && drap_regno != AX_REG)
9564 else if (regparm < 2 && drap_regno != DX_REG)
9566 /* ecx is the static chain register. */
9567 else if (regparm < 3 && !fastcall_p && !static_chain_p
9568 && drap_regno != CX_REG)
9570 else if (ix86_save_reg (BX_REG, true))
9572 /* esi is the static chain register. */
9573 else if (!(regparm == 3 && static_chain_p)
9574 && ix86_save_reg (SI_REG, true))
9576 else if (ix86_save_reg (DI_REG, true))
9580 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9585 sr->reg = gen_rtx_REG (Pmode, regno);
9588 rtx insn = emit_insn (gen_push (sr->reg));
9589 RTX_FRAME_RELATED_P (insn) = 1;
9593 /* Release a scratch register obtained from the preceding function. */
9596 release_scratch_register_on_entry (struct scratch_reg *sr)
9600 rtx x, insn = emit_insn (gen_pop (sr->reg));
9602 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9603 RTX_FRAME_RELATED_P (insn) = 1;
9604 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9605 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9606 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9610 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9612 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9615 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9617 /* We skip the probe for the first interval + a small dope of 4 words and
9618 probe that many bytes past the specified size to maintain a protection
9619 area at the botton of the stack. */
9620 const int dope = 4 * UNITS_PER_WORD;
9621 rtx size_rtx = GEN_INT (size), last;
9623 /* See if we have a constant small number of probes to generate. If so,
9624 that's the easy case. The run-time loop is made up of 11 insns in the
9625 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9626 for n # of intervals. */
9627 if (size <= 5 * PROBE_INTERVAL)
9629 HOST_WIDE_INT i, adjust;
9630 bool first_probe = true;
9632 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9633 values of N from 1 until it exceeds SIZE. If only one probe is
9634 needed, this will not generate any code. Then adjust and probe
9635 to PROBE_INTERVAL + SIZE. */
9636 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9640 adjust = 2 * PROBE_INTERVAL + dope;
9641 first_probe = false;
9644 adjust = PROBE_INTERVAL;
9646 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9647 plus_constant (stack_pointer_rtx, -adjust)));
9648 emit_stack_probe (stack_pointer_rtx);
9652 adjust = size + PROBE_INTERVAL + dope;
9654 adjust = size + PROBE_INTERVAL - i;
9656 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9657 plus_constant (stack_pointer_rtx, -adjust)));
9658 emit_stack_probe (stack_pointer_rtx);
9660 /* Adjust back to account for the additional first interval. */
9661 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9662 plus_constant (stack_pointer_rtx,
9663 PROBE_INTERVAL + dope)));
9666 /* Otherwise, do the same as above, but in a loop. Note that we must be
9667 extra careful with variables wrapping around because we might be at
9668 the very top (or the very bottom) of the address space and we have
9669 to be able to handle this case properly; in particular, we use an
9670 equality test for the loop condition. */
9673 HOST_WIDE_INT rounded_size;
9674 struct scratch_reg sr;
9676 get_scratch_register_on_entry (&sr);
9679 /* Step 1: round SIZE to the previous multiple of the interval. */
9681 rounded_size = size & -PROBE_INTERVAL;
9684 /* Step 2: compute initial and final value of the loop counter. */
9686 /* SP = SP_0 + PROBE_INTERVAL. */
9687 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9688 plus_constant (stack_pointer_rtx,
9689 - (PROBE_INTERVAL + dope))));
9691 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9692 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9693 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9694 gen_rtx_PLUS (Pmode, sr.reg,
9695 stack_pointer_rtx)));
9700 while (SP != LAST_ADDR)
9702 SP = SP + PROBE_INTERVAL
9706 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9707 values of N from 1 until it is equal to ROUNDED_SIZE. */
9709 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9712 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9713 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9715 if (size != rounded_size)
9717 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9718 plus_constant (stack_pointer_rtx,
9719 rounded_size - size)));
9720 emit_stack_probe (stack_pointer_rtx);
9723 /* Adjust back to account for the additional first interval. */
9724 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9725 plus_constant (stack_pointer_rtx,
9726 PROBE_INTERVAL + dope)));
9728 release_scratch_register_on_entry (&sr);
9731 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9733 /* Even if the stack pointer isn't the CFA register, we need to correctly
9734 describe the adjustments made to it, in particular differentiate the
9735 frame-related ones from the frame-unrelated ones. */
9738 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9739 XVECEXP (expr, 0, 0)
9740 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9741 plus_constant (stack_pointer_rtx, -size));
9742 XVECEXP (expr, 0, 1)
9743 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9744 plus_constant (stack_pointer_rtx,
9745 PROBE_INTERVAL + dope + size));
9746 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9747 RTX_FRAME_RELATED_P (last) = 1;
9749 cfun->machine->fs.sp_offset += size;
9752 /* Make sure nothing is scheduled before we are done. */
9753 emit_insn (gen_blockage ());
9756 /* Adjust the stack pointer up to REG while probing it. */
9759 output_adjust_stack_and_probe (rtx reg)
9761 static int labelno = 0;
9762 char loop_lab[32], end_lab[32];
9765 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9766 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9768 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9770 /* Jump to END_LAB if SP == LAST_ADDR. */
9771 xops[0] = stack_pointer_rtx;
9773 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9774 fputs ("\tje\t", asm_out_file);
9775 assemble_name_raw (asm_out_file, end_lab);
9776 fputc ('\n', asm_out_file);
9778 /* SP = SP + PROBE_INTERVAL. */
9779 xops[1] = GEN_INT (PROBE_INTERVAL);
9780 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9783 xops[1] = const0_rtx;
9784 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9786 fprintf (asm_out_file, "\tjmp\t");
9787 assemble_name_raw (asm_out_file, loop_lab);
9788 fputc ('\n', asm_out_file);
9790 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9795 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9796 inclusive. These are offsets from the current stack pointer. */
9799 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9801 /* See if we have a constant small number of probes to generate. If so,
9802 that's the easy case. The run-time loop is made up of 7 insns in the
9803 generic case while the compile-time loop is made up of n insns for n #
9805 if (size <= 7 * PROBE_INTERVAL)
9809 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9810 it exceeds SIZE. If only one probe is needed, this will not
9811 generate any code. Then probe at FIRST + SIZE. */
9812 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9813 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9815 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9818 /* Otherwise, do the same as above, but in a loop. Note that we must be
9819 extra careful with variables wrapping around because we might be at
9820 the very top (or the very bottom) of the address space and we have
9821 to be able to handle this case properly; in particular, we use an
9822 equality test for the loop condition. */
9825 HOST_WIDE_INT rounded_size, last;
9826 struct scratch_reg sr;
9828 get_scratch_register_on_entry (&sr);
9831 /* Step 1: round SIZE to the previous multiple of the interval. */
9833 rounded_size = size & -PROBE_INTERVAL;
9836 /* Step 2: compute initial and final value of the loop counter. */
9838 /* TEST_OFFSET = FIRST. */
9839 emit_move_insn (sr.reg, GEN_INT (-first));
9841 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9842 last = first + rounded_size;
9847 while (TEST_ADDR != LAST_ADDR)
9849 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9853 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9854 until it is equal to ROUNDED_SIZE. */
9856 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9859 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9860 that SIZE is equal to ROUNDED_SIZE. */
9862 if (size != rounded_size)
9863 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9866 rounded_size - size));
9868 release_scratch_register_on_entry (&sr);
9871 /* Make sure nothing is scheduled before we are done. */
9872 emit_insn (gen_blockage ());
9875 /* Probe a range of stack addresses from REG to END, inclusive. These are
9876 offsets from the current stack pointer. */
9879 output_probe_stack_range (rtx reg, rtx end)
9881 static int labelno = 0;
9882 char loop_lab[32], end_lab[32];
9885 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9886 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9888 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9890 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9893 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9894 fputs ("\tje\t", asm_out_file);
9895 assemble_name_raw (asm_out_file, end_lab);
9896 fputc ('\n', asm_out_file);
9898 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9899 xops[1] = GEN_INT (PROBE_INTERVAL);
9900 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9902 /* Probe at TEST_ADDR. */
9903 xops[0] = stack_pointer_rtx;
9905 xops[2] = const0_rtx;
9906 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9908 fprintf (asm_out_file, "\tjmp\t");
9909 assemble_name_raw (asm_out_file, loop_lab);
9910 fputc ('\n', asm_out_file);
9912 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9917 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9918 to be generated in correct form. */
9920 ix86_finalize_stack_realign_flags (void)
9922 /* Check if stack realign is really needed after reload, and
9923 stores result in cfun */
9924 unsigned int incoming_stack_boundary
9925 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9926 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9927 unsigned int stack_realign = (incoming_stack_boundary
9928 < (current_function_is_leaf
9929 ? crtl->max_used_stack_slot_alignment
9930 : crtl->stack_alignment_needed));
9932 if (crtl->stack_realign_finalized)
9934 /* After stack_realign_needed is finalized, we can't no longer
9936 gcc_assert (crtl->stack_realign_needed == stack_realign);
9940 /* If the only reason for frame_pointer_needed is that we conservatively
9941 assumed stack realignment might be needed, but in the end nothing that
9942 needed the stack alignment had been spilled, clear frame_pointer_needed
9943 and say we don't need stack realignment. */
9946 && frame_pointer_needed
9947 && current_function_is_leaf
9948 && flag_omit_frame_pointer
9949 && current_function_sp_is_unchanging
9950 && !ix86_current_function_calls_tls_descriptor
9951 && !crtl->accesses_prior_frames
9952 && !cfun->calls_alloca
9953 && !crtl->calls_eh_return
9954 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
9955 && !ix86_frame_pointer_required ()
9956 && get_frame_size () == 0
9957 && ix86_nsaved_sseregs () == 0
9958 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
9960 HARD_REG_SET set_up_by_prologue, prologue_used;
9963 CLEAR_HARD_REG_SET (prologue_used);
9964 CLEAR_HARD_REG_SET (set_up_by_prologue);
9965 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
9966 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
9967 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
9968 HARD_FRAME_POINTER_REGNUM);
9972 FOR_BB_INSNS (bb, insn)
9973 if (NONDEBUG_INSN_P (insn)
9974 && requires_stack_frame_p (insn, prologue_used,
9975 set_up_by_prologue))
9977 crtl->stack_realign_needed = stack_realign;
9978 crtl->stack_realign_finalized = true;
9983 frame_pointer_needed = false;
9984 stack_realign = false;
9985 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
9986 crtl->stack_alignment_needed = incoming_stack_boundary;
9987 crtl->stack_alignment_estimated = incoming_stack_boundary;
9988 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
9989 crtl->preferred_stack_boundary = incoming_stack_boundary;
9990 df_finish_pass (true);
9991 df_scan_alloc (NULL);
9993 df_compute_regs_ever_live (true);
9997 crtl->stack_realign_needed = stack_realign;
9998 crtl->stack_realign_finalized = true;
10001 /* Expand the prologue into a bunch of separate insns. */
10004 ix86_expand_prologue (void)
10006 struct machine_function *m = cfun->machine;
10009 struct ix86_frame frame;
10010 HOST_WIDE_INT allocate;
10011 bool int_registers_saved;
10012 bool sse_registers_saved;
10014 ix86_finalize_stack_realign_flags ();
10016 /* DRAP should not coexist with stack_realign_fp */
10017 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10019 memset (&m->fs, 0, sizeof (m->fs));
10021 /* Initialize CFA state for before the prologue. */
10022 m->fs.cfa_reg = stack_pointer_rtx;
10023 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10025 /* Track SP offset to the CFA. We continue tracking this after we've
10026 swapped the CFA register away from SP. In the case of re-alignment
10027 this is fudged; we're interested to offsets within the local frame. */
10028 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10029 m->fs.sp_valid = true;
10031 ix86_compute_frame_layout (&frame);
10033 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10035 /* We should have already generated an error for any use of
10036 ms_hook on a nested function. */
10037 gcc_checking_assert (!ix86_static_chain_on_stack);
10039 /* Check if profiling is active and we shall use profiling before
10040 prologue variant. If so sorry. */
10041 if (crtl->profile && flag_fentry != 0)
10042 sorry ("ms_hook_prologue attribute isn%'t compatible "
10043 "with -mfentry for 32-bit");
10045 /* In ix86_asm_output_function_label we emitted:
10046 8b ff movl.s %edi,%edi
10048 8b ec movl.s %esp,%ebp
10050 This matches the hookable function prologue in Win32 API
10051 functions in Microsoft Windows XP Service Pack 2 and newer.
10052 Wine uses this to enable Windows apps to hook the Win32 API
10053 functions provided by Wine.
10055 What that means is that we've already set up the frame pointer. */
10057 if (frame_pointer_needed
10058 && !(crtl->drap_reg && crtl->stack_realign_needed))
10062 /* We've decided to use the frame pointer already set up.
10063 Describe this to the unwinder by pretending that both
10064 push and mov insns happen right here.
10066 Putting the unwind info here at the end of the ms_hook
10067 is done so that we can make absolutely certain we get
10068 the required byte sequence at the start of the function,
10069 rather than relying on an assembler that can produce
10070 the exact encoding required.
10072 However it does mean (in the unpatched case) that we have
10073 a 1 insn window where the asynchronous unwind info is
10074 incorrect. However, if we placed the unwind info at
10075 its correct location we would have incorrect unwind info
10076 in the patched case. Which is probably all moot since
10077 I don't expect Wine generates dwarf2 unwind info for the
10078 system libraries that use this feature. */
10080 insn = emit_insn (gen_blockage ());
10082 push = gen_push (hard_frame_pointer_rtx);
10083 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10084 stack_pointer_rtx);
10085 RTX_FRAME_RELATED_P (push) = 1;
10086 RTX_FRAME_RELATED_P (mov) = 1;
10088 RTX_FRAME_RELATED_P (insn) = 1;
10089 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10090 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10092 /* Note that gen_push incremented m->fs.cfa_offset, even
10093 though we didn't emit the push insn here. */
10094 m->fs.cfa_reg = hard_frame_pointer_rtx;
10095 m->fs.fp_offset = m->fs.cfa_offset;
10096 m->fs.fp_valid = true;
10100 /* The frame pointer is not needed so pop %ebp again.
10101 This leaves us with a pristine state. */
10102 emit_insn (gen_pop (hard_frame_pointer_rtx));
10106 /* The first insn of a function that accepts its static chain on the
10107 stack is to push the register that would be filled in by a direct
10108 call. This insn will be skipped by the trampoline. */
10109 else if (ix86_static_chain_on_stack)
10111 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10112 emit_insn (gen_blockage ());
10114 /* We don't want to interpret this push insn as a register save,
10115 only as a stack adjustment. The real copy of the register as
10116 a save will be done later, if needed. */
10117 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10118 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10119 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10120 RTX_FRAME_RELATED_P (insn) = 1;
10123 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10124 of DRAP is needed and stack realignment is really needed after reload */
10125 if (stack_realign_drap)
10127 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10129 /* Only need to push parameter pointer reg if it is caller saved. */
10130 if (!call_used_regs[REGNO (crtl->drap_reg)])
10132 /* Push arg pointer reg */
10133 insn = emit_insn (gen_push (crtl->drap_reg));
10134 RTX_FRAME_RELATED_P (insn) = 1;
10137 /* Grab the argument pointer. */
10138 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10139 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10140 RTX_FRAME_RELATED_P (insn) = 1;
10141 m->fs.cfa_reg = crtl->drap_reg;
10142 m->fs.cfa_offset = 0;
10144 /* Align the stack. */
10145 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10147 GEN_INT (-align_bytes)));
10148 RTX_FRAME_RELATED_P (insn) = 1;
10150 /* Replicate the return address on the stack so that return
10151 address can be reached via (argp - 1) slot. This is needed
10152 to implement macro RETURN_ADDR_RTX and intrinsic function
10153 expand_builtin_return_addr etc. */
10154 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10155 t = gen_frame_mem (Pmode, t);
10156 insn = emit_insn (gen_push (t));
10157 RTX_FRAME_RELATED_P (insn) = 1;
10159 /* For the purposes of frame and register save area addressing,
10160 we've started over with a new frame. */
10161 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10162 m->fs.realigned = true;
10165 int_registers_saved = (frame.nregs == 0);
10166 sse_registers_saved = (frame.nsseregs == 0);
10168 if (frame_pointer_needed && !m->fs.fp_valid)
10170 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10171 slower on all targets. Also sdb doesn't like it. */
10172 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10173 RTX_FRAME_RELATED_P (insn) = 1;
10175 /* Push registers now, before setting the frame pointer
10177 if (!int_registers_saved
10179 && !frame.save_regs_using_mov)
10181 ix86_emit_save_regs ();
10182 int_registers_saved = true;
10183 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10186 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10188 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10189 RTX_FRAME_RELATED_P (insn) = 1;
10191 if (m->fs.cfa_reg == stack_pointer_rtx)
10192 m->fs.cfa_reg = hard_frame_pointer_rtx;
10193 m->fs.fp_offset = m->fs.sp_offset;
10194 m->fs.fp_valid = true;
10198 if (!int_registers_saved)
10200 /* If saving registers via PUSH, do so now. */
10201 if (!frame.save_regs_using_mov)
10203 ix86_emit_save_regs ();
10204 int_registers_saved = true;
10205 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10208 /* When using red zone we may start register saving before allocating
10209 the stack frame saving one cycle of the prologue. However, avoid
10210 doing this if we have to probe the stack; at least on x86_64 the
10211 stack probe can turn into a call that clobbers a red zone location. */
10212 else if (ix86_using_red_zone ()
10213 && (! TARGET_STACK_PROBE
10214 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10216 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10217 int_registers_saved = true;
10221 if (stack_realign_fp)
10223 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10224 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10226 /* The computation of the size of the re-aligned stack frame means
10227 that we must allocate the size of the register save area before
10228 performing the actual alignment. Otherwise we cannot guarantee
10229 that there's enough storage above the realignment point. */
10230 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10231 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10232 GEN_INT (m->fs.sp_offset
10233 - frame.sse_reg_save_offset),
10236 /* Align the stack. */
10237 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10239 GEN_INT (-align_bytes)));
10241 /* For the purposes of register save area addressing, the stack
10242 pointer is no longer valid. As for the value of sp_offset,
10243 see ix86_compute_frame_layout, which we need to match in order
10244 to pass verification of stack_pointer_offset at the end. */
10245 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10246 m->fs.sp_valid = false;
10249 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10251 if (flag_stack_usage_info)
10253 /* We start to count from ARG_POINTER. */
10254 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10256 /* If it was realigned, take into account the fake frame. */
10257 if (stack_realign_drap)
10259 if (ix86_static_chain_on_stack)
10260 stack_size += UNITS_PER_WORD;
10262 if (!call_used_regs[REGNO (crtl->drap_reg)])
10263 stack_size += UNITS_PER_WORD;
10265 /* This over-estimates by 1 minimal-stack-alignment-unit but
10266 mitigates that by counting in the new return address slot. */
10267 current_function_dynamic_stack_size
10268 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10271 current_function_static_stack_size = stack_size;
10274 /* On SEH target with very large frame size, allocate an area to save
10275 SSE registers (as the very large allocation won't be described). */
10277 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10278 && !sse_registers_saved)
10280 HOST_WIDE_INT sse_size =
10281 frame.sse_reg_save_offset - frame.reg_save_offset;
10283 gcc_assert (int_registers_saved);
10285 /* No need to do stack checking as the area will be immediately
10287 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10288 GEN_INT (-sse_size), -1,
10289 m->fs.cfa_reg == stack_pointer_rtx);
10290 allocate -= sse_size;
10291 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10292 sse_registers_saved = true;
10295 /* The stack has already been decremented by the instruction calling us
10296 so probe if the size is non-negative to preserve the protection area. */
10297 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10299 /* We expect the registers to be saved when probes are used. */
10300 gcc_assert (int_registers_saved);
10302 if (STACK_CHECK_MOVING_SP)
10304 ix86_adjust_stack_and_probe (allocate);
10309 HOST_WIDE_INT size = allocate;
10311 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10312 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10314 if (TARGET_STACK_PROBE)
10315 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10317 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10323 else if (!ix86_target_stack_probe ()
10324 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10326 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10327 GEN_INT (-allocate), -1,
10328 m->fs.cfa_reg == stack_pointer_rtx);
10332 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10334 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10336 bool eax_live = false;
10337 bool r10_live = false;
10340 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10341 if (!TARGET_64BIT_MS_ABI)
10342 eax_live = ix86_eax_live_at_start_p ();
10346 emit_insn (gen_push (eax));
10347 allocate -= UNITS_PER_WORD;
10351 r10 = gen_rtx_REG (Pmode, R10_REG);
10352 emit_insn (gen_push (r10));
10353 allocate -= UNITS_PER_WORD;
10356 emit_move_insn (eax, GEN_INT (allocate));
10357 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10359 /* Use the fact that AX still contains ALLOCATE. */
10360 adjust_stack_insn = (TARGET_64BIT
10361 ? gen_pro_epilogue_adjust_stack_di_sub
10362 : gen_pro_epilogue_adjust_stack_si_sub);
10364 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10365 stack_pointer_rtx, eax));
10367 /* Note that SEH directives need to continue tracking the stack
10368 pointer even after the frame pointer has been set up. */
10369 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10371 if (m->fs.cfa_reg == stack_pointer_rtx)
10372 m->fs.cfa_offset += allocate;
10374 RTX_FRAME_RELATED_P (insn) = 1;
10375 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10376 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10377 plus_constant (stack_pointer_rtx,
10380 m->fs.sp_offset += allocate;
10382 if (r10_live && eax_live)
10384 t = choose_baseaddr (m->fs.sp_offset - allocate);
10385 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10386 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10387 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10389 else if (eax_live || r10_live)
10391 t = choose_baseaddr (m->fs.sp_offset - allocate);
10392 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10395 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10397 /* If we havn't already set up the frame pointer, do so now. */
10398 if (frame_pointer_needed && !m->fs.fp_valid)
10400 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10401 GEN_INT (frame.stack_pointer_offset
10402 - frame.hard_frame_pointer_offset));
10403 insn = emit_insn (insn);
10404 RTX_FRAME_RELATED_P (insn) = 1;
10405 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10407 if (m->fs.cfa_reg == stack_pointer_rtx)
10408 m->fs.cfa_reg = hard_frame_pointer_rtx;
10409 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10410 m->fs.fp_valid = true;
10413 if (!int_registers_saved)
10414 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10415 if (!sse_registers_saved)
10416 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10418 pic_reg_used = false;
10419 if (pic_offset_table_rtx
10420 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10423 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10425 if (alt_pic_reg_used != INVALID_REGNUM)
10426 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10428 pic_reg_used = true;
10435 if (ix86_cmodel == CM_LARGE_PIC)
10437 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10438 rtx label = gen_label_rtx ();
10439 emit_label (label);
10440 LABEL_PRESERVE_P (label) = 1;
10441 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10442 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10443 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10444 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10445 pic_offset_table_rtx, tmp_reg));
10448 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10452 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10453 RTX_FRAME_RELATED_P (insn) = 1;
10454 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10458 /* In the pic_reg_used case, make sure that the got load isn't deleted
10459 when mcount needs it. Blockage to avoid call movement across mcount
10460 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10462 if (crtl->profile && !flag_fentry && pic_reg_used)
10463 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10465 if (crtl->drap_reg && !crtl->stack_realign_needed)
10467 /* vDRAP is setup but after reload it turns out stack realign
10468 isn't necessary, here we will emit prologue to setup DRAP
10469 without stack realign adjustment */
10470 t = choose_baseaddr (0);
10471 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10474 /* Prevent instructions from being scheduled into register save push
10475 sequence when access to the redzone area is done through frame pointer.
10476 The offset between the frame pointer and the stack pointer is calculated
10477 relative to the value of the stack pointer at the end of the function
10478 prologue, and moving instructions that access redzone area via frame
10479 pointer inside push sequence violates this assumption. */
10480 if (frame_pointer_needed && frame.red_zone_size)
10481 emit_insn (gen_memory_blockage ());
10483 /* Emit cld instruction if stringops are used in the function. */
10484 if (TARGET_CLD && ix86_current_function_needs_cld)
10485 emit_insn (gen_cld ());
10487 /* SEH requires that the prologue end within 256 bytes of the start of
10488 the function. Prevent instruction schedules that would extend that.
10489 Further, prevent alloca modifications to the stack pointer from being
10490 combined with prologue modifications. */
10492 emit_insn (gen_prologue_use (stack_pointer_rtx));
10495 /* Emit code to restore REG using a POP insn. */
10498 ix86_emit_restore_reg_using_pop (rtx reg)
10500 struct machine_function *m = cfun->machine;
10501 rtx insn = emit_insn (gen_pop (reg));
10503 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10504 m->fs.sp_offset -= UNITS_PER_WORD;
10506 if (m->fs.cfa_reg == crtl->drap_reg
10507 && REGNO (reg) == REGNO (crtl->drap_reg))
10509 /* Previously we'd represented the CFA as an expression
10510 like *(%ebp - 8). We've just popped that value from
10511 the stack, which means we need to reset the CFA to
10512 the drap register. This will remain until we restore
10513 the stack pointer. */
10514 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10515 RTX_FRAME_RELATED_P (insn) = 1;
10517 /* This means that the DRAP register is valid for addressing too. */
10518 m->fs.drap_valid = true;
10522 if (m->fs.cfa_reg == stack_pointer_rtx)
10524 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10525 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10526 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10527 RTX_FRAME_RELATED_P (insn) = 1;
10529 m->fs.cfa_offset -= UNITS_PER_WORD;
10532 /* When the frame pointer is the CFA, and we pop it, we are
10533 swapping back to the stack pointer as the CFA. This happens
10534 for stack frames that don't allocate other data, so we assume
10535 the stack pointer is now pointing at the return address, i.e.
10536 the function entry state, which makes the offset be 1 word. */
10537 if (reg == hard_frame_pointer_rtx)
10539 m->fs.fp_valid = false;
10540 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10542 m->fs.cfa_reg = stack_pointer_rtx;
10543 m->fs.cfa_offset -= UNITS_PER_WORD;
10545 add_reg_note (insn, REG_CFA_DEF_CFA,
10546 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10547 GEN_INT (m->fs.cfa_offset)));
10548 RTX_FRAME_RELATED_P (insn) = 1;
10553 /* Emit code to restore saved registers using POP insns. */
10556 ix86_emit_restore_regs_using_pop (void)
10558 unsigned int regno;
10560 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10561 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10562 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10565 /* Emit code and notes for the LEAVE instruction. */
10568 ix86_emit_leave (void)
10570 struct machine_function *m = cfun->machine;
10571 rtx insn = emit_insn (ix86_gen_leave ());
10573 ix86_add_queued_cfa_restore_notes (insn);
10575 gcc_assert (m->fs.fp_valid);
10576 m->fs.sp_valid = true;
10577 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10578 m->fs.fp_valid = false;
10580 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10582 m->fs.cfa_reg = stack_pointer_rtx;
10583 m->fs.cfa_offset = m->fs.sp_offset;
10585 add_reg_note (insn, REG_CFA_DEF_CFA,
10586 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10587 RTX_FRAME_RELATED_P (insn) = 1;
10589 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10593 /* Emit code to restore saved registers using MOV insns.
10594 First register is restored from CFA - CFA_OFFSET. */
10596 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10597 bool maybe_eh_return)
10599 struct machine_function *m = cfun->machine;
10600 unsigned int regno;
10602 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10603 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10605 rtx reg = gen_rtx_REG (Pmode, regno);
10608 mem = choose_baseaddr (cfa_offset);
10609 mem = gen_frame_mem (Pmode, mem);
10610 insn = emit_move_insn (reg, mem);
10612 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10614 /* Previously we'd represented the CFA as an expression
10615 like *(%ebp - 8). We've just popped that value from
10616 the stack, which means we need to reset the CFA to
10617 the drap register. This will remain until we restore
10618 the stack pointer. */
10619 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10620 RTX_FRAME_RELATED_P (insn) = 1;
10622 /* This means that the DRAP register is valid for addressing. */
10623 m->fs.drap_valid = true;
10626 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10628 cfa_offset -= UNITS_PER_WORD;
10632 /* Emit code to restore saved registers using MOV insns.
10633 First register is restored from CFA - CFA_OFFSET. */
10635 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10636 bool maybe_eh_return)
10638 unsigned int regno;
10640 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10641 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10643 rtx reg = gen_rtx_REG (V4SFmode, regno);
10646 mem = choose_baseaddr (cfa_offset);
10647 mem = gen_rtx_MEM (V4SFmode, mem);
10648 set_mem_align (mem, 128);
10649 emit_move_insn (reg, mem);
10651 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10657 /* Emit vzeroupper if needed. */
10660 ix86_maybe_emit_epilogue_vzeroupper (void)
10662 if (TARGET_VZEROUPPER
10663 && !TREE_THIS_VOLATILE (cfun->decl)
10664 && !cfun->machine->caller_return_avx256_p)
10665 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10668 /* Restore function stack, frame, and registers. */
10671 ix86_expand_epilogue (int style)
10673 struct machine_function *m = cfun->machine;
10674 struct machine_frame_state frame_state_save = m->fs;
10675 struct ix86_frame frame;
10676 bool restore_regs_via_mov;
10679 ix86_finalize_stack_realign_flags ();
10680 ix86_compute_frame_layout (&frame);
10682 m->fs.sp_valid = (!frame_pointer_needed
10683 || (current_function_sp_is_unchanging
10684 && !stack_realign_fp));
10685 gcc_assert (!m->fs.sp_valid
10686 || m->fs.sp_offset == frame.stack_pointer_offset);
10688 /* The FP must be valid if the frame pointer is present. */
10689 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10690 gcc_assert (!m->fs.fp_valid
10691 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10693 /* We must have *some* valid pointer to the stack frame. */
10694 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10696 /* The DRAP is never valid at this point. */
10697 gcc_assert (!m->fs.drap_valid);
10699 /* See the comment about red zone and frame
10700 pointer usage in ix86_expand_prologue. */
10701 if (frame_pointer_needed && frame.red_zone_size)
10702 emit_insn (gen_memory_blockage ());
10704 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10705 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10707 /* Determine the CFA offset of the end of the red-zone. */
10708 m->fs.red_zone_offset = 0;
10709 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10711 /* The red-zone begins below the return address. */
10712 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10714 /* When the register save area is in the aligned portion of
10715 the stack, determine the maximum runtime displacement that
10716 matches up with the aligned frame. */
10717 if (stack_realign_drap)
10718 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10722 /* Special care must be taken for the normal return case of a function
10723 using eh_return: the eax and edx registers are marked as saved, but
10724 not restored along this path. Adjust the save location to match. */
10725 if (crtl->calls_eh_return && style != 2)
10726 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10728 /* EH_RETURN requires the use of moves to function properly. */
10729 if (crtl->calls_eh_return)
10730 restore_regs_via_mov = true;
10731 /* SEH requires the use of pops to identify the epilogue. */
10732 else if (TARGET_SEH)
10733 restore_regs_via_mov = false;
10734 /* If we're only restoring one register and sp is not valid then
10735 using a move instruction to restore the register since it's
10736 less work than reloading sp and popping the register. */
10737 else if (!m->fs.sp_valid && frame.nregs <= 1)
10738 restore_regs_via_mov = true;
10739 else if (TARGET_EPILOGUE_USING_MOVE
10740 && cfun->machine->use_fast_prologue_epilogue
10741 && (frame.nregs > 1
10742 || m->fs.sp_offset != frame.reg_save_offset))
10743 restore_regs_via_mov = true;
10744 else if (frame_pointer_needed
10746 && m->fs.sp_offset != frame.reg_save_offset)
10747 restore_regs_via_mov = true;
10748 else if (frame_pointer_needed
10749 && TARGET_USE_LEAVE
10750 && cfun->machine->use_fast_prologue_epilogue
10751 && frame.nregs == 1)
10752 restore_regs_via_mov = true;
10754 restore_regs_via_mov = false;
10756 if (restore_regs_via_mov || frame.nsseregs)
10758 /* Ensure that the entire register save area is addressable via
10759 the stack pointer, if we will restore via sp. */
10761 && m->fs.sp_offset > 0x7fffffff
10762 && !(m->fs.fp_valid || m->fs.drap_valid)
10763 && (frame.nsseregs + frame.nregs) != 0)
10765 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10766 GEN_INT (m->fs.sp_offset
10767 - frame.sse_reg_save_offset),
10769 m->fs.cfa_reg == stack_pointer_rtx);
10773 /* If there are any SSE registers to restore, then we have to do it
10774 via moves, since there's obviously no pop for SSE regs. */
10775 if (frame.nsseregs)
10776 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10779 if (restore_regs_via_mov)
10784 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10786 /* eh_return epilogues need %ecx added to the stack pointer. */
10789 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10791 /* Stack align doesn't work with eh_return. */
10792 gcc_assert (!stack_realign_drap);
10793 /* Neither does regparm nested functions. */
10794 gcc_assert (!ix86_static_chain_on_stack);
10796 if (frame_pointer_needed)
10798 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10799 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10800 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10802 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10803 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10805 /* Note that we use SA as a temporary CFA, as the return
10806 address is at the proper place relative to it. We
10807 pretend this happens at the FP restore insn because
10808 prior to this insn the FP would be stored at the wrong
10809 offset relative to SA, and after this insn we have no
10810 other reasonable register to use for the CFA. We don't
10811 bother resetting the CFA to the SP for the duration of
10812 the return insn. */
10813 add_reg_note (insn, REG_CFA_DEF_CFA,
10814 plus_constant (sa, UNITS_PER_WORD));
10815 ix86_add_queued_cfa_restore_notes (insn);
10816 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10817 RTX_FRAME_RELATED_P (insn) = 1;
10819 m->fs.cfa_reg = sa;
10820 m->fs.cfa_offset = UNITS_PER_WORD;
10821 m->fs.fp_valid = false;
10823 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10824 const0_rtx, style, false);
10828 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10829 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10830 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10831 ix86_add_queued_cfa_restore_notes (insn);
10833 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10834 if (m->fs.cfa_offset != UNITS_PER_WORD)
10836 m->fs.cfa_offset = UNITS_PER_WORD;
10837 add_reg_note (insn, REG_CFA_DEF_CFA,
10838 plus_constant (stack_pointer_rtx,
10840 RTX_FRAME_RELATED_P (insn) = 1;
10843 m->fs.sp_offset = UNITS_PER_WORD;
10844 m->fs.sp_valid = true;
10849 /* SEH requires that the function end with (1) a stack adjustment
10850 if necessary, (2) a sequence of pops, and (3) a return or
10851 jump instruction. Prevent insns from the function body from
10852 being scheduled into this sequence. */
10855 /* Prevent a catch region from being adjacent to the standard
10856 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10857 several other flags that would be interesting to test are
10859 if (flag_non_call_exceptions)
10860 emit_insn (gen_nops (const1_rtx));
10862 emit_insn (gen_blockage ());
10865 /* First step is to deallocate the stack frame so that we can
10866 pop the registers. Also do it on SEH target for very large
10867 frame as the emitted instructions aren't allowed by the ABI in
10869 if (!m->fs.sp_valid
10871 && (m->fs.sp_offset - frame.reg_save_offset
10872 >= SEH_MAX_FRAME_SIZE)))
10874 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10875 GEN_INT (m->fs.fp_offset
10876 - frame.reg_save_offset),
10879 else if (m->fs.sp_offset != frame.reg_save_offset)
10881 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10882 GEN_INT (m->fs.sp_offset
10883 - frame.reg_save_offset),
10885 m->fs.cfa_reg == stack_pointer_rtx);
10888 ix86_emit_restore_regs_using_pop ();
10891 /* If we used a stack pointer and haven't already got rid of it,
10893 if (m->fs.fp_valid)
10895 /* If the stack pointer is valid and pointing at the frame
10896 pointer store address, then we only need a pop. */
10897 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10898 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10899 /* Leave results in shorter dependency chains on CPUs that are
10900 able to grok it fast. */
10901 else if (TARGET_USE_LEAVE
10902 || optimize_function_for_size_p (cfun)
10903 || !cfun->machine->use_fast_prologue_epilogue)
10904 ix86_emit_leave ();
10907 pro_epilogue_adjust_stack (stack_pointer_rtx,
10908 hard_frame_pointer_rtx,
10909 const0_rtx, style, !using_drap);
10910 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10916 int param_ptr_offset = UNITS_PER_WORD;
10919 gcc_assert (stack_realign_drap);
10921 if (ix86_static_chain_on_stack)
10922 param_ptr_offset += UNITS_PER_WORD;
10923 if (!call_used_regs[REGNO (crtl->drap_reg)])
10924 param_ptr_offset += UNITS_PER_WORD;
10926 insn = emit_insn (gen_rtx_SET
10927 (VOIDmode, stack_pointer_rtx,
10928 gen_rtx_PLUS (Pmode,
10930 GEN_INT (-param_ptr_offset))));
10931 m->fs.cfa_reg = stack_pointer_rtx;
10932 m->fs.cfa_offset = param_ptr_offset;
10933 m->fs.sp_offset = param_ptr_offset;
10934 m->fs.realigned = false;
10936 add_reg_note (insn, REG_CFA_DEF_CFA,
10937 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10938 GEN_INT (param_ptr_offset)));
10939 RTX_FRAME_RELATED_P (insn) = 1;
10941 if (!call_used_regs[REGNO (crtl->drap_reg)])
10942 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10945 /* At this point the stack pointer must be valid, and we must have
10946 restored all of the registers. We may not have deallocated the
10947 entire stack frame. We've delayed this until now because it may
10948 be possible to merge the local stack deallocation with the
10949 deallocation forced by ix86_static_chain_on_stack. */
10950 gcc_assert (m->fs.sp_valid);
10951 gcc_assert (!m->fs.fp_valid);
10952 gcc_assert (!m->fs.realigned);
10953 if (m->fs.sp_offset != UNITS_PER_WORD)
10955 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10956 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10960 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10962 /* Sibcall epilogues don't want a return instruction. */
10965 m->fs = frame_state_save;
10969 /* Emit vzeroupper if needed. */
10970 ix86_maybe_emit_epilogue_vzeroupper ();
10972 if (crtl->args.pops_args && crtl->args.size)
10974 rtx popc = GEN_INT (crtl->args.pops_args);
10976 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10977 address, do explicit add, and jump indirectly to the caller. */
10979 if (crtl->args.pops_args >= 65536)
10981 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10984 /* There is no "pascal" calling convention in any 64bit ABI. */
10985 gcc_assert (!TARGET_64BIT);
10987 insn = emit_insn (gen_pop (ecx));
10988 m->fs.cfa_offset -= UNITS_PER_WORD;
10989 m->fs.sp_offset -= UNITS_PER_WORD;
10991 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10992 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10993 add_reg_note (insn, REG_CFA_REGISTER,
10994 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10995 RTX_FRAME_RELATED_P (insn) = 1;
10997 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10999 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11002 emit_jump_insn (gen_simple_return_pop_internal (popc));
11005 emit_jump_insn (gen_simple_return_internal ());
11007 /* Restore the state back to the state from the prologue,
11008 so that it's correct for the next epilogue. */
11009 m->fs = frame_state_save;
11012 /* Reset from the function's potential modifications. */
11015 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11016 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11018 if (pic_offset_table_rtx)
11019 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11021 /* Mach-O doesn't support labels at the end of objects, so if
11022 it looks like we might want one, insert a NOP. */
11024 rtx insn = get_last_insn ();
11025 rtx deleted_debug_label = NULL_RTX;
11028 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11030 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11031 notes only, instead set their CODE_LABEL_NUMBER to -1,
11032 otherwise there would be code generation differences
11033 in between -g and -g0. */
11034 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11035 deleted_debug_label = insn;
11036 insn = PREV_INSN (insn);
11041 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11042 fputs ("\tnop\n", file);
11043 else if (deleted_debug_label)
11044 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11045 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11046 CODE_LABEL_NUMBER (insn) = -1;
11052 /* Return a scratch register to use in the split stack prologue. The
11053 split stack prologue is used for -fsplit-stack. It is the first
11054 instructions in the function, even before the regular prologue.
11055 The scratch register can be any caller-saved register which is not
11056 used for parameters or for the static chain. */
11058 static unsigned int
11059 split_stack_prologue_scratch_regno (void)
11068 is_fastcall = (lookup_attribute ("fastcall",
11069 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11071 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11075 if (DECL_STATIC_CHAIN (cfun->decl))
11077 sorry ("-fsplit-stack does not support fastcall with "
11078 "nested function");
11079 return INVALID_REGNUM;
11083 else if (regparm < 3)
11085 if (!DECL_STATIC_CHAIN (cfun->decl))
11091 sorry ("-fsplit-stack does not support 2 register "
11092 " parameters for a nested function");
11093 return INVALID_REGNUM;
11100 /* FIXME: We could make this work by pushing a register
11101 around the addition and comparison. */
11102 sorry ("-fsplit-stack does not support 3 register parameters");
11103 return INVALID_REGNUM;
11108 /* A SYMBOL_REF for the function which allocates new stackspace for
11111 static GTY(()) rtx split_stack_fn;
11113 /* A SYMBOL_REF for the more stack function when using the large
11116 static GTY(()) rtx split_stack_fn_large;
11118 /* Handle -fsplit-stack. These are the first instructions in the
11119 function, even before the regular prologue. */
11122 ix86_expand_split_stack_prologue (void)
11124 struct ix86_frame frame;
11125 HOST_WIDE_INT allocate;
11126 unsigned HOST_WIDE_INT args_size;
11127 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11128 rtx scratch_reg = NULL_RTX;
11129 rtx varargs_label = NULL_RTX;
11132 gcc_assert (flag_split_stack && reload_completed);
11134 ix86_finalize_stack_realign_flags ();
11135 ix86_compute_frame_layout (&frame);
11136 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11138 /* This is the label we will branch to if we have enough stack
11139 space. We expect the basic block reordering pass to reverse this
11140 branch if optimizing, so that we branch in the unlikely case. */
11141 label = gen_label_rtx ();
11143 /* We need to compare the stack pointer minus the frame size with
11144 the stack boundary in the TCB. The stack boundary always gives
11145 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11146 can compare directly. Otherwise we need to do an addition. */
11148 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11149 UNSPEC_STACK_CHECK);
11150 limit = gen_rtx_CONST (Pmode, limit);
11151 limit = gen_rtx_MEM (Pmode, limit);
11152 if (allocate < SPLIT_STACK_AVAILABLE)
11153 current = stack_pointer_rtx;
11156 unsigned int scratch_regno;
11159 /* We need a scratch register to hold the stack pointer minus
11160 the required frame size. Since this is the very start of the
11161 function, the scratch register can be any caller-saved
11162 register which is not used for parameters. */
11163 offset = GEN_INT (- allocate);
11164 scratch_regno = split_stack_prologue_scratch_regno ();
11165 if (scratch_regno == INVALID_REGNUM)
11167 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11168 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11170 /* We don't use ix86_gen_add3 in this case because it will
11171 want to split to lea, but when not optimizing the insn
11172 will not be split after this point. */
11173 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11174 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11179 emit_move_insn (scratch_reg, offset);
11180 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11181 stack_pointer_rtx));
11183 current = scratch_reg;
11186 ix86_expand_branch (GEU, current, limit, label);
11187 jump_insn = get_last_insn ();
11188 JUMP_LABEL (jump_insn) = label;
11190 /* Mark the jump as very likely to be taken. */
11191 add_reg_note (jump_insn, REG_BR_PROB,
11192 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11194 if (split_stack_fn == NULL_RTX)
11195 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11196 fn = split_stack_fn;
11198 /* Get more stack space. We pass in the desired stack space and the
11199 size of the arguments to copy to the new stack. In 32-bit mode
11200 we push the parameters; __morestack will return on a new stack
11201 anyhow. In 64-bit mode we pass the parameters in r10 and
11203 allocate_rtx = GEN_INT (allocate);
11204 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11205 call_fusage = NULL_RTX;
11210 reg10 = gen_rtx_REG (Pmode, R10_REG);
11211 reg11 = gen_rtx_REG (Pmode, R11_REG);
11213 /* If this function uses a static chain, it will be in %r10.
11214 Preserve it across the call to __morestack. */
11215 if (DECL_STATIC_CHAIN (cfun->decl))
11219 rax = gen_rtx_REG (Pmode, AX_REG);
11220 emit_move_insn (rax, reg10);
11221 use_reg (&call_fusage, rax);
11224 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11226 HOST_WIDE_INT argval;
11228 /* When using the large model we need to load the address
11229 into a register, and we've run out of registers. So we
11230 switch to a different calling convention, and we call a
11231 different function: __morestack_large. We pass the
11232 argument size in the upper 32 bits of r10 and pass the
11233 frame size in the lower 32 bits. */
11234 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11235 gcc_assert ((args_size & 0xffffffff) == args_size);
11237 if (split_stack_fn_large == NULL_RTX)
11238 split_stack_fn_large =
11239 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11241 if (ix86_cmodel == CM_LARGE_PIC)
11245 label = gen_label_rtx ();
11246 emit_label (label);
11247 LABEL_PRESERVE_P (label) = 1;
11248 emit_insn (gen_set_rip_rex64 (reg10, label));
11249 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11250 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11251 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11253 x = gen_rtx_CONST (Pmode, x);
11254 emit_move_insn (reg11, x);
11255 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11256 x = gen_const_mem (Pmode, x);
11257 emit_move_insn (reg11, x);
11260 emit_move_insn (reg11, split_stack_fn_large);
11264 argval = ((args_size << 16) << 16) + allocate;
11265 emit_move_insn (reg10, GEN_INT (argval));
11269 emit_move_insn (reg10, allocate_rtx);
11270 emit_move_insn (reg11, GEN_INT (args_size));
11271 use_reg (&call_fusage, reg11);
11274 use_reg (&call_fusage, reg10);
11278 emit_insn (gen_push (GEN_INT (args_size)));
11279 emit_insn (gen_push (allocate_rtx));
11281 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11282 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11284 add_function_usage_to (call_insn, call_fusage);
11286 /* In order to make call/return prediction work right, we now need
11287 to execute a return instruction. See
11288 libgcc/config/i386/morestack.S for the details on how this works.
11290 For flow purposes gcc must not see this as a return
11291 instruction--we need control flow to continue at the subsequent
11292 label. Therefore, we use an unspec. */
11293 gcc_assert (crtl->args.pops_args < 65536);
11294 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11296 /* If we are in 64-bit mode and this function uses a static chain,
11297 we saved %r10 in %rax before calling _morestack. */
11298 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11299 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11300 gen_rtx_REG (Pmode, AX_REG));
11302 /* If this function calls va_start, we need to store a pointer to
11303 the arguments on the old stack, because they may not have been
11304 all copied to the new stack. At this point the old stack can be
11305 found at the frame pointer value used by __morestack, because
11306 __morestack has set that up before calling back to us. Here we
11307 store that pointer in a scratch register, and in
11308 ix86_expand_prologue we store the scratch register in a stack
11310 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11312 unsigned int scratch_regno;
11316 scratch_regno = split_stack_prologue_scratch_regno ();
11317 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11318 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11322 return address within this function
11323 return address of caller of this function
11325 So we add three words to get to the stack arguments.
11329 return address within this function
11330 first argument to __morestack
11331 second argument to __morestack
11332 return address of caller of this function
11334 So we add five words to get to the stack arguments.
11336 words = TARGET_64BIT ? 3 : 5;
11337 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11338 gen_rtx_PLUS (Pmode, frame_reg,
11339 GEN_INT (words * UNITS_PER_WORD))));
11341 varargs_label = gen_label_rtx ();
11342 emit_jump_insn (gen_jump (varargs_label));
11343 JUMP_LABEL (get_last_insn ()) = varargs_label;
11348 emit_label (label);
11349 LABEL_NUSES (label) = 1;
11351 /* If this function calls va_start, we now have to set the scratch
11352 register for the case where we do not call __morestack. In this
11353 case we need to set it based on the stack pointer. */
11354 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11356 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11357 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11358 GEN_INT (UNITS_PER_WORD))));
11360 emit_label (varargs_label);
11361 LABEL_NUSES (varargs_label) = 1;
11365 /* We may have to tell the dataflow pass that the split stack prologue
11366 is initializing a scratch register. */
11369 ix86_live_on_entry (bitmap regs)
11371 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11373 gcc_assert (flag_split_stack);
11374 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11378 /* Determine if op is suitable SUBREG RTX for address. */
11381 ix86_address_subreg_operand (rtx op)
11383 enum machine_mode mode;
11388 mode = GET_MODE (op);
11390 if (GET_MODE_CLASS (mode) != MODE_INT)
11393 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11394 failures when the register is one word out of a two word structure. */
11395 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11398 /* simplify_subreg does not handle stack pointer. */
11399 if (REGNO (op) == STACK_POINTER_REGNUM)
11402 /* Allow only SUBREGs of non-eliminable hard registers. */
11403 return register_no_elim_operand (op, mode);
11406 /* Extract the parts of an RTL expression that is a valid memory address
11407 for an instruction. Return 0 if the structure of the address is
11408 grossly off. Return -1 if the address contains ASHIFT, so it is not
11409 strictly valid, but still used for computing length of lea instruction. */
11412 ix86_decompose_address (rtx addr, struct ix86_address *out)
11414 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11415 rtx base_reg, index_reg;
11416 HOST_WIDE_INT scale = 1;
11417 rtx scale_rtx = NULL_RTX;
11420 enum ix86_address_seg seg = SEG_DEFAULT;
11422 /* Allow zero-extended SImode addresses,
11423 they will be emitted with addr32 prefix. */
11424 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11426 if (GET_CODE (addr) == ZERO_EXTEND
11427 && GET_MODE (XEXP (addr, 0)) == SImode)
11429 addr = XEXP (addr, 0);
11430 if (CONST_INT_P (addr))
11433 else if (GET_CODE (addr) == AND
11434 && const_32bit_mask (XEXP (addr, 1), DImode))
11436 addr = XEXP (addr, 0);
11438 /* Adjust SUBREGs. */
11439 if (GET_CODE (addr) == SUBREG
11440 && GET_MODE (SUBREG_REG (addr)) == SImode)
11442 addr = SUBREG_REG (addr);
11443 if (CONST_INT_P (addr))
11446 else if (GET_MODE (addr) == DImode)
11447 addr = gen_rtx_SUBREG (SImode, addr, 0);
11448 else if (GET_MODE (addr) != VOIDmode)
11453 /* Allow SImode subregs of DImode addresses,
11454 they will be emitted with addr32 prefix. */
11455 if (TARGET_64BIT && GET_MODE (addr) == SImode)
11457 if (GET_CODE (addr) == SUBREG
11458 && GET_MODE (SUBREG_REG (addr)) == DImode)
11460 addr = SUBREG_REG (addr);
11461 if (CONST_INT_P (addr))
11468 else if (GET_CODE (addr) == SUBREG)
11470 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11475 else if (GET_CODE (addr) == PLUS)
11477 rtx addends[4], op;
11485 addends[n++] = XEXP (op, 1);
11488 while (GET_CODE (op) == PLUS);
11493 for (i = n; i >= 0; --i)
11496 switch (GET_CODE (op))
11501 index = XEXP (op, 0);
11502 scale_rtx = XEXP (op, 1);
11508 index = XEXP (op, 0);
11509 tmp = XEXP (op, 1);
11510 if (!CONST_INT_P (tmp))
11512 scale = INTVAL (tmp);
11513 if ((unsigned HOST_WIDE_INT) scale > 3)
11515 scale = 1 << scale;
11519 if (XINT (op, 1) == UNSPEC_TP
11520 && TARGET_TLS_DIRECT_SEG_REFS
11521 && seg == SEG_DEFAULT)
11522 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11528 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11555 else if (GET_CODE (addr) == MULT)
11557 index = XEXP (addr, 0); /* index*scale */
11558 scale_rtx = XEXP (addr, 1);
11560 else if (GET_CODE (addr) == ASHIFT)
11562 /* We're called for lea too, which implements ashift on occasion. */
11563 index = XEXP (addr, 0);
11564 tmp = XEXP (addr, 1);
11565 if (!CONST_INT_P (tmp))
11567 scale = INTVAL (tmp);
11568 if ((unsigned HOST_WIDE_INT) scale > 3)
11570 scale = 1 << scale;
11573 else if (CONST_INT_P (addr))
11575 if (!x86_64_immediate_operand (addr, VOIDmode))
11578 /* Constant addresses are sign extended to 64bit, we have to
11579 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11581 && val_signbit_known_set_p (SImode, INTVAL (addr)))
11587 disp = addr; /* displacement */
11593 else if (GET_CODE (index) == SUBREG
11594 && ix86_address_subreg_operand (SUBREG_REG (index)))
11600 /* Extract the integral value of scale. */
11603 if (!CONST_INT_P (scale_rtx))
11605 scale = INTVAL (scale_rtx);
11608 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11609 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11611 /* Avoid useless 0 displacement. */
11612 if (disp == const0_rtx && (base || index))
11615 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11616 if (base_reg && index_reg && scale == 1
11617 && (index_reg == arg_pointer_rtx
11618 || index_reg == frame_pointer_rtx
11619 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11622 tmp = base, base = index, index = tmp;
11623 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11626 /* Special case: %ebp cannot be encoded as a base without a displacement.
11630 && (base_reg == hard_frame_pointer_rtx
11631 || base_reg == frame_pointer_rtx
11632 || base_reg == arg_pointer_rtx
11633 || (REG_P (base_reg)
11634 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11635 || REGNO (base_reg) == R13_REG))))
11638 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11639 Avoid this by transforming to [%esi+0].
11640 Reload calls address legitimization without cfun defined, so we need
11641 to test cfun for being non-NULL. */
11642 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11643 && base_reg && !index_reg && !disp
11644 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11647 /* Special case: encode reg+reg instead of reg*2. */
11648 if (!base && index && scale == 2)
11649 base = index, base_reg = index_reg, scale = 1;
11651 /* Special case: scaling cannot be encoded without base or displacement. */
11652 if (!base && !disp && index && scale != 1)
11656 out->index = index;
11658 out->scale = scale;
11664 /* Return cost of the memory address x.
11665 For i386, it is better to use a complex address than let gcc copy
11666 the address into a reg and make a new pseudo. But not if the address
11667 requires to two regs - that would mean more pseudos with longer
11670 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11672 struct ix86_address parts;
11674 int ok = ix86_decompose_address (x, &parts);
11678 if (parts.base && GET_CODE (parts.base) == SUBREG)
11679 parts.base = SUBREG_REG (parts.base);
11680 if (parts.index && GET_CODE (parts.index) == SUBREG)
11681 parts.index = SUBREG_REG (parts.index);
11683 /* Attempt to minimize number of registers in the address. */
11685 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11687 && (!REG_P (parts.index)
11688 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11692 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11694 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11695 && parts.base != parts.index)
11698 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11699 since it's predecode logic can't detect the length of instructions
11700 and it degenerates to vector decoded. Increase cost of such
11701 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11702 to split such addresses or even refuse such addresses at all.
11704 Following addressing modes are affected:
11709 The first and last case may be avoidable by explicitly coding the zero in
11710 memory address, but I don't have AMD-K6 machine handy to check this
11714 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11715 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11716 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11722 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11723 this is used for to form addresses to local data when -fPIC is in
11727 darwin_local_data_pic (rtx disp)
11729 return (GET_CODE (disp) == UNSPEC
11730 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11733 /* Determine if a given RTX is a valid constant. We already know this
11734 satisfies CONSTANT_P. */
11737 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11739 switch (GET_CODE (x))
11744 if (GET_CODE (x) == PLUS)
11746 if (!CONST_INT_P (XEXP (x, 1)))
11751 if (TARGET_MACHO && darwin_local_data_pic (x))
11754 /* Only some unspecs are valid as "constants". */
11755 if (GET_CODE (x) == UNSPEC)
11756 switch (XINT (x, 1))
11759 case UNSPEC_GOTOFF:
11760 case UNSPEC_PLTOFF:
11761 return TARGET_64BIT;
11763 case UNSPEC_NTPOFF:
11764 x = XVECEXP (x, 0, 0);
11765 return (GET_CODE (x) == SYMBOL_REF
11766 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11767 case UNSPEC_DTPOFF:
11768 x = XVECEXP (x, 0, 0);
11769 return (GET_CODE (x) == SYMBOL_REF
11770 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11775 /* We must have drilled down to a symbol. */
11776 if (GET_CODE (x) == LABEL_REF)
11778 if (GET_CODE (x) != SYMBOL_REF)
11783 /* TLS symbols are never valid. */
11784 if (SYMBOL_REF_TLS_MODEL (x))
11787 /* DLLIMPORT symbols are never valid. */
11788 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11789 && SYMBOL_REF_DLLIMPORT_P (x))
11793 /* mdynamic-no-pic */
11794 if (MACHO_DYNAMIC_NO_PIC_P)
11795 return machopic_symbol_defined_p (x);
11800 if (GET_MODE (x) == TImode
11801 && x != CONST0_RTX (TImode)
11807 if (!standard_sse_constant_p (x))
11814 /* Otherwise we handle everything else in the move patterns. */
11818 /* Determine if it's legal to put X into the constant pool. This
11819 is not possible for the address of thread-local symbols, which
11820 is checked above. */
11823 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11825 /* We can always put integral constants and vectors in memory. */
11826 switch (GET_CODE (x))
11836 return !ix86_legitimate_constant_p (mode, x);
11840 /* Nonzero if the constant value X is a legitimate general operand
11841 when generating PIC code. It is given that flag_pic is on and
11842 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11845 legitimate_pic_operand_p (rtx x)
11849 switch (GET_CODE (x))
11852 inner = XEXP (x, 0);
11853 if (GET_CODE (inner) == PLUS
11854 && CONST_INT_P (XEXP (inner, 1)))
11855 inner = XEXP (inner, 0);
11857 /* Only some unspecs are valid as "constants". */
11858 if (GET_CODE (inner) == UNSPEC)
11859 switch (XINT (inner, 1))
11862 case UNSPEC_GOTOFF:
11863 case UNSPEC_PLTOFF:
11864 return TARGET_64BIT;
11866 x = XVECEXP (inner, 0, 0);
11867 return (GET_CODE (x) == SYMBOL_REF
11868 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11869 case UNSPEC_MACHOPIC_OFFSET:
11870 return legitimate_pic_address_disp_p (x);
11878 return legitimate_pic_address_disp_p (x);
11885 /* Determine if a given CONST RTX is a valid memory displacement
11889 legitimate_pic_address_disp_p (rtx disp)
11893 /* In 64bit mode we can allow direct addresses of symbols and labels
11894 when they are not dynamic symbols. */
11897 rtx op0 = disp, op1;
11899 switch (GET_CODE (disp))
11905 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11907 op0 = XEXP (XEXP (disp, 0), 0);
11908 op1 = XEXP (XEXP (disp, 0), 1);
11909 if (!CONST_INT_P (op1)
11910 || INTVAL (op1) >= 16*1024*1024
11911 || INTVAL (op1) < -16*1024*1024)
11913 if (GET_CODE (op0) == LABEL_REF)
11915 if (GET_CODE (op0) == CONST
11916 && GET_CODE (XEXP (op0, 0)) == UNSPEC
11917 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
11919 if (GET_CODE (op0) == UNSPEC
11920 && XINT (op0, 1) == UNSPEC_PCREL)
11922 if (GET_CODE (op0) != SYMBOL_REF)
11927 /* TLS references should always be enclosed in UNSPEC. */
11928 if (SYMBOL_REF_TLS_MODEL (op0))
11930 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11931 && ix86_cmodel != CM_LARGE_PIC)
11939 if (GET_CODE (disp) != CONST)
11941 disp = XEXP (disp, 0);
11945 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11946 of GOT tables. We should not need these anyway. */
11947 if (GET_CODE (disp) != UNSPEC
11948 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11949 && XINT (disp, 1) != UNSPEC_GOTOFF
11950 && XINT (disp, 1) != UNSPEC_PCREL
11951 && XINT (disp, 1) != UNSPEC_PLTOFF))
11954 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11955 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11961 if (GET_CODE (disp) == PLUS)
11963 if (!CONST_INT_P (XEXP (disp, 1)))
11965 disp = XEXP (disp, 0);
11969 if (TARGET_MACHO && darwin_local_data_pic (disp))
11972 if (GET_CODE (disp) != UNSPEC)
11975 switch (XINT (disp, 1))
11980 /* We need to check for both symbols and labels because VxWorks loads
11981 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11983 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11984 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11985 case UNSPEC_GOTOFF:
11986 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11987 While ABI specify also 32bit relocation but we don't produce it in
11988 small PIC model at all. */
11989 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11990 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11992 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11994 case UNSPEC_GOTTPOFF:
11995 case UNSPEC_GOTNTPOFF:
11996 case UNSPEC_INDNTPOFF:
11999 disp = XVECEXP (disp, 0, 0);
12000 return (GET_CODE (disp) == SYMBOL_REF
12001 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12002 case UNSPEC_NTPOFF:
12003 disp = XVECEXP (disp, 0, 0);
12004 return (GET_CODE (disp) == SYMBOL_REF
12005 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12006 case UNSPEC_DTPOFF:
12007 disp = XVECEXP (disp, 0, 0);
12008 return (GET_CODE (disp) == SYMBOL_REF
12009 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12015 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12016 replace the input X, or the original X if no replacement is called for.
12017 The output parameter *WIN is 1 if the calling macro should goto WIN,
12018 0 if it should not. */
12021 ix86_legitimize_reload_address (rtx x,
12022 enum machine_mode mode ATTRIBUTE_UNUSED,
12023 int opnum, int type,
12024 int ind_levels ATTRIBUTE_UNUSED)
12026 /* Reload can generate:
12028 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12032 This RTX is rejected from ix86_legitimate_address_p due to
12033 non-strictness of base register 97. Following this rejection,
12034 reload pushes all three components into separate registers,
12035 creating invalid memory address RTX.
12037 Following code reloads only the invalid part of the
12038 memory address RTX. */
12040 if (GET_CODE (x) == PLUS
12041 && REG_P (XEXP (x, 1))
12042 && GET_CODE (XEXP (x, 0)) == PLUS
12043 && REG_P (XEXP (XEXP (x, 0), 1)))
12046 bool something_reloaded = false;
12048 base = XEXP (XEXP (x, 0), 1);
12049 if (!REG_OK_FOR_BASE_STRICT_P (base))
12051 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12052 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12053 opnum, (enum reload_type)type);
12054 something_reloaded = true;
12057 index = XEXP (x, 1);
12058 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12060 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12061 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12062 opnum, (enum reload_type)type);
12063 something_reloaded = true;
12066 gcc_assert (something_reloaded);
12073 /* Recognizes RTL expressions that are valid memory addresses for an
12074 instruction. The MODE argument is the machine mode for the MEM
12075 expression that wants to use this address.
12077 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12078 convert common non-canonical forms to canonical form so that they will
12082 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12083 rtx addr, bool strict)
12085 struct ix86_address parts;
12086 rtx base, index, disp;
12087 HOST_WIDE_INT scale;
12089 if (ix86_decompose_address (addr, &parts) <= 0)
12090 /* Decomposition failed. */
12094 index = parts.index;
12096 scale = parts.scale;
12098 /* Validate base register. */
12105 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
12106 reg = SUBREG_REG (base);
12108 /* Base is not a register. */
12111 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
12114 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12115 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12116 /* Base is not valid. */
12120 /* Validate index register. */
12127 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
12128 reg = SUBREG_REG (index);
12130 /* Index is not a register. */
12133 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
12136 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12137 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12138 /* Index is not valid. */
12142 /* Index and base should have the same mode. */
12144 && GET_MODE (base) != GET_MODE (index))
12147 /* Validate scale factor. */
12151 /* Scale without index. */
12154 if (scale != 2 && scale != 4 && scale != 8)
12155 /* Scale is not a valid multiplier. */
12159 /* Validate displacement. */
12162 if (GET_CODE (disp) == CONST
12163 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12164 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12165 switch (XINT (XEXP (disp, 0), 1))
12167 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12168 used. While ABI specify also 32bit relocations, we don't produce
12169 them at all and use IP relative instead. */
12171 case UNSPEC_GOTOFF:
12172 gcc_assert (flag_pic);
12174 goto is_legitimate_pic;
12176 /* 64bit address unspec. */
12179 case UNSPEC_GOTPCREL:
12181 gcc_assert (flag_pic);
12182 goto is_legitimate_pic;
12184 case UNSPEC_GOTTPOFF:
12185 case UNSPEC_GOTNTPOFF:
12186 case UNSPEC_INDNTPOFF:
12187 case UNSPEC_NTPOFF:
12188 case UNSPEC_DTPOFF:
12191 case UNSPEC_STACK_CHECK:
12192 gcc_assert (flag_split_stack);
12196 /* Invalid address unspec. */
12200 else if (SYMBOLIC_CONST (disp)
12204 && MACHOPIC_INDIRECT
12205 && !machopic_operand_p (disp)
12211 if (TARGET_64BIT && (index || base))
12213 /* foo@dtpoff(%rX) is ok. */
12214 if (GET_CODE (disp) != CONST
12215 || GET_CODE (XEXP (disp, 0)) != PLUS
12216 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12217 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12218 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12219 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12220 /* Non-constant pic memory reference. */
12223 else if ((!TARGET_MACHO || flag_pic)
12224 && ! legitimate_pic_address_disp_p (disp))
12225 /* Displacement is an invalid pic construct. */
12228 else if (MACHO_DYNAMIC_NO_PIC_P
12229 && !ix86_legitimate_constant_p (Pmode, disp))
12230 /* displacment must be referenced via non_lazy_pointer */
12234 /* This code used to verify that a symbolic pic displacement
12235 includes the pic_offset_table_rtx register.
12237 While this is good idea, unfortunately these constructs may
12238 be created by "adds using lea" optimization for incorrect
12247 This code is nonsensical, but results in addressing
12248 GOT table with pic_offset_table_rtx base. We can't
12249 just refuse it easily, since it gets matched by
12250 "addsi3" pattern, that later gets split to lea in the
12251 case output register differs from input. While this
12252 can be handled by separate addsi pattern for this case
12253 that never results in lea, this seems to be easier and
12254 correct fix for crash to disable this test. */
12256 else if (GET_CODE (disp) != LABEL_REF
12257 && !CONST_INT_P (disp)
12258 && (GET_CODE (disp) != CONST
12259 || !ix86_legitimate_constant_p (Pmode, disp))
12260 && (GET_CODE (disp) != SYMBOL_REF
12261 || !ix86_legitimate_constant_p (Pmode, disp)))
12262 /* Displacement is not constant. */
12264 else if (TARGET_64BIT
12265 && !x86_64_immediate_operand (disp, VOIDmode))
12266 /* Displacement is out of range. */
12270 /* Everything looks valid. */
12274 /* Determine if a given RTX is a valid constant address. */
12277 constant_address_p (rtx x)
12279 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12282 /* Return a unique alias set for the GOT. */
12284 static alias_set_type
12285 ix86_GOT_alias_set (void)
12287 static alias_set_type set = -1;
12289 set = new_alias_set ();
12293 /* Return a legitimate reference for ORIG (an address) using the
12294 register REG. If REG is 0, a new pseudo is generated.
12296 There are two types of references that must be handled:
12298 1. Global data references must load the address from the GOT, via
12299 the PIC reg. An insn is emitted to do this load, and the reg is
12302 2. Static data references, constant pool addresses, and code labels
12303 compute the address as an offset from the GOT, whose base is in
12304 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12305 differentiate them from global data objects. The returned
12306 address is the PIC reg + an unspec constant.
12308 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12309 reg also appears in the address. */
12312 legitimize_pic_address (rtx orig, rtx reg)
12315 rtx new_rtx = orig;
12319 if (TARGET_MACHO && !TARGET_64BIT)
12322 reg = gen_reg_rtx (Pmode);
12323 /* Use the generic Mach-O PIC machinery. */
12324 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12328 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12330 else if (TARGET_64BIT
12331 && ix86_cmodel != CM_SMALL_PIC
12332 && gotoff_operand (addr, Pmode))
12335 /* This symbol may be referenced via a displacement from the PIC
12336 base address (@GOTOFF). */
12338 if (reload_in_progress)
12339 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12340 if (GET_CODE (addr) == CONST)
12341 addr = XEXP (addr, 0);
12342 if (GET_CODE (addr) == PLUS)
12344 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12346 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12349 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12350 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12352 tmpreg = gen_reg_rtx (Pmode);
12355 emit_move_insn (tmpreg, new_rtx);
12359 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12360 tmpreg, 1, OPTAB_DIRECT);
12363 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12365 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12367 /* This symbol may be referenced via a displacement from the PIC
12368 base address (@GOTOFF). */
12370 if (reload_in_progress)
12371 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12372 if (GET_CODE (addr) == CONST)
12373 addr = XEXP (addr, 0);
12374 if (GET_CODE (addr) == PLUS)
12376 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12378 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12381 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12382 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12383 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12387 emit_move_insn (reg, new_rtx);
12391 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12392 /* We can't use @GOTOFF for text labels on VxWorks;
12393 see gotoff_operand. */
12394 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12396 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12398 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12399 return legitimize_dllimport_symbol (addr, true);
12400 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12401 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12402 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12404 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12405 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12409 /* For x64 PE-COFF there is no GOT table. So we use address
12411 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12413 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12414 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12417 reg = gen_reg_rtx (Pmode);
12418 emit_move_insn (reg, new_rtx);
12421 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12423 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12424 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12425 new_rtx = gen_const_mem (Pmode, new_rtx);
12426 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12429 reg = gen_reg_rtx (Pmode);
12430 /* Use directly gen_movsi, otherwise the address is loaded
12431 into register for CSE. We don't want to CSE this addresses,
12432 instead we CSE addresses from the GOT table, so skip this. */
12433 emit_insn (gen_movsi (reg, new_rtx));
12438 /* This symbol must be referenced via a load from the
12439 Global Offset Table (@GOT). */
12441 if (reload_in_progress)
12442 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12443 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12444 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12446 new_rtx = force_reg (Pmode, new_rtx);
12447 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12448 new_rtx = gen_const_mem (Pmode, new_rtx);
12449 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12452 reg = gen_reg_rtx (Pmode);
12453 emit_move_insn (reg, new_rtx);
12459 if (CONST_INT_P (addr)
12460 && !x86_64_immediate_operand (addr, VOIDmode))
12464 emit_move_insn (reg, addr);
12468 new_rtx = force_reg (Pmode, addr);
12470 else if (GET_CODE (addr) == CONST)
12472 addr = XEXP (addr, 0);
12474 /* We must match stuff we generate before. Assume the only
12475 unspecs that can get here are ours. Not that we could do
12476 anything with them anyway.... */
12477 if (GET_CODE (addr) == UNSPEC
12478 || (GET_CODE (addr) == PLUS
12479 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12481 gcc_assert (GET_CODE (addr) == PLUS);
12483 if (GET_CODE (addr) == PLUS)
12485 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12487 /* Check first to see if this is a constant offset from a @GOTOFF
12488 symbol reference. */
12489 if (gotoff_operand (op0, Pmode)
12490 && CONST_INT_P (op1))
12494 if (reload_in_progress)
12495 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12496 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12498 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12499 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12500 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12504 emit_move_insn (reg, new_rtx);
12510 if (INTVAL (op1) < -16*1024*1024
12511 || INTVAL (op1) >= 16*1024*1024)
12513 if (!x86_64_immediate_operand (op1, Pmode))
12514 op1 = force_reg (Pmode, op1);
12515 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12521 base = legitimize_pic_address (XEXP (addr, 0), reg);
12522 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12523 base == reg ? NULL_RTX : reg);
12525 if (CONST_INT_P (new_rtx))
12526 new_rtx = plus_constant (base, INTVAL (new_rtx));
12529 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12531 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12532 new_rtx = XEXP (new_rtx, 1);
12534 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12542 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12545 get_thread_pointer (bool to_reg)
12547 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12549 if (GET_MODE (tp) != Pmode)
12550 tp = convert_to_mode (Pmode, tp, 1);
12553 tp = copy_addr_to_reg (tp);
12558 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12560 static GTY(()) rtx ix86_tls_symbol;
12563 ix86_tls_get_addr (void)
12565 if (!ix86_tls_symbol)
12568 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12569 ? "___tls_get_addr" : "__tls_get_addr");
12571 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12574 return ix86_tls_symbol;
12577 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12579 static GTY(()) rtx ix86_tls_module_base_symbol;
12582 ix86_tls_module_base (void)
12584 if (!ix86_tls_module_base_symbol)
12586 ix86_tls_module_base_symbol
12587 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12589 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12590 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12593 return ix86_tls_module_base_symbol;
12596 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12597 false if we expect this to be used for a memory address and true if
12598 we expect to load the address into a register. */
12601 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12603 rtx dest, base, off;
12604 rtx pic = NULL_RTX, tp = NULL_RTX;
12609 case TLS_MODEL_GLOBAL_DYNAMIC:
12610 dest = gen_reg_rtx (Pmode);
12615 pic = pic_offset_table_rtx;
12618 pic = gen_reg_rtx (Pmode);
12619 emit_insn (gen_set_got (pic));
12623 if (TARGET_GNU2_TLS)
12626 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12628 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12630 tp = get_thread_pointer (true);
12631 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12633 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12637 rtx caddr = ix86_tls_get_addr ();
12641 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12644 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12645 insns = get_insns ();
12648 RTL_CONST_CALL_P (insns) = 1;
12649 emit_libcall_block (insns, dest, rax, x);
12652 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12656 case TLS_MODEL_LOCAL_DYNAMIC:
12657 base = gen_reg_rtx (Pmode);
12662 pic = pic_offset_table_rtx;
12665 pic = gen_reg_rtx (Pmode);
12666 emit_insn (gen_set_got (pic));
12670 if (TARGET_GNU2_TLS)
12672 rtx tmp = ix86_tls_module_base ();
12675 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12677 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12679 tp = get_thread_pointer (true);
12680 set_unique_reg_note (get_last_insn (), REG_EQUAL,
12681 gen_rtx_MINUS (Pmode, tmp, tp));
12685 rtx caddr = ix86_tls_get_addr ();
12689 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12692 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12693 insns = get_insns ();
12696 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12697 share the LD_BASE result with other LD model accesses. */
12698 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12699 UNSPEC_TLS_LD_BASE);
12701 RTL_CONST_CALL_P (insns) = 1;
12702 emit_libcall_block (insns, base, rax, eqv);
12705 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12708 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12709 off = gen_rtx_CONST (Pmode, off);
12711 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12713 if (TARGET_GNU2_TLS)
12715 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12717 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12721 case TLS_MODEL_INITIAL_EXEC:
12724 if (TARGET_SUN_TLS)
12726 /* The Sun linker took the AMD64 TLS spec literally
12727 and can only handle %rax as destination of the
12728 initial executable code sequence. */
12730 dest = gen_reg_rtx (Pmode);
12731 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12736 type = UNSPEC_GOTNTPOFF;
12740 if (reload_in_progress)
12741 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12742 pic = pic_offset_table_rtx;
12743 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12745 else if (!TARGET_ANY_GNU_TLS)
12747 pic = gen_reg_rtx (Pmode);
12748 emit_insn (gen_set_got (pic));
12749 type = UNSPEC_GOTTPOFF;
12754 type = UNSPEC_INDNTPOFF;
12757 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12758 off = gen_rtx_CONST (Pmode, off);
12760 off = gen_rtx_PLUS (Pmode, pic, off);
12761 off = gen_const_mem (Pmode, off);
12762 set_mem_alias_set (off, ix86_GOT_alias_set ());
12764 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12766 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12767 off = force_reg (Pmode, off);
12768 return gen_rtx_PLUS (Pmode, base, off);
12772 base = get_thread_pointer (true);
12773 dest = gen_reg_rtx (Pmode);
12774 emit_insn (gen_subsi3 (dest, base, off));
12778 case TLS_MODEL_LOCAL_EXEC:
12779 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12780 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12781 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12782 off = gen_rtx_CONST (Pmode, off);
12784 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12786 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12787 return gen_rtx_PLUS (Pmode, base, off);
12791 base = get_thread_pointer (true);
12792 dest = gen_reg_rtx (Pmode);
12793 emit_insn (gen_subsi3 (dest, base, off));
12798 gcc_unreachable ();
12804 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12807 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12808 htab_t dllimport_map;
12811 get_dllimport_decl (tree decl)
12813 struct tree_map *h, in;
12816 const char *prefix;
12817 size_t namelen, prefixlen;
12822 if (!dllimport_map)
12823 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12825 in.hash = htab_hash_pointer (decl);
12826 in.base.from = decl;
12827 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12828 h = (struct tree_map *) *loc;
12832 *loc = h = ggc_alloc_tree_map ();
12834 h->base.from = decl;
12835 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12836 VAR_DECL, NULL, ptr_type_node);
12837 DECL_ARTIFICIAL (to) = 1;
12838 DECL_IGNORED_P (to) = 1;
12839 DECL_EXTERNAL (to) = 1;
12840 TREE_READONLY (to) = 1;
12842 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12843 name = targetm.strip_name_encoding (name);
12844 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12845 ? "*__imp_" : "*__imp__";
12846 namelen = strlen (name);
12847 prefixlen = strlen (prefix);
12848 imp_name = (char *) alloca (namelen + prefixlen + 1);
12849 memcpy (imp_name, prefix, prefixlen);
12850 memcpy (imp_name + prefixlen, name, namelen + 1);
12852 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12853 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12854 SET_SYMBOL_REF_DECL (rtl, to);
12855 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12857 rtl = gen_const_mem (Pmode, rtl);
12858 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12860 SET_DECL_RTL (to, rtl);
12861 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12866 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12867 true if we require the result be a register. */
12870 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12875 gcc_assert (SYMBOL_REF_DECL (symbol));
12876 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12878 x = DECL_RTL (imp_decl);
12880 x = force_reg (Pmode, x);
12884 /* Try machine-dependent ways of modifying an illegitimate address
12885 to be legitimate. If we find one, return the new, valid address.
12886 This macro is used in only one place: `memory_address' in explow.c.
12888 OLDX is the address as it was before break_out_memory_refs was called.
12889 In some cases it is useful to look at this to decide what needs to be done.
12891 It is always safe for this macro to do nothing. It exists to recognize
12892 opportunities to optimize the output.
12894 For the 80386, we handle X+REG by loading X into a register R and
12895 using R+REG. R will go in a general reg and indexing will be used.
12896 However, if REG is a broken-out memory address or multiplication,
12897 nothing needs to be done because REG can certainly go in a general reg.
12899 When -fpic is used, special handling is needed for symbolic references.
12900 See comments by legitimize_pic_address in i386.c for details. */
12903 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12904 enum machine_mode mode)
12909 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12911 return legitimize_tls_address (x, (enum tls_model) log, false);
12912 if (GET_CODE (x) == CONST
12913 && GET_CODE (XEXP (x, 0)) == PLUS
12914 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12915 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12917 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12918 (enum tls_model) log, false);
12919 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12922 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12924 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12925 return legitimize_dllimport_symbol (x, true);
12926 if (GET_CODE (x) == CONST
12927 && GET_CODE (XEXP (x, 0)) == PLUS
12928 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12929 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12931 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12932 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12936 if (flag_pic && SYMBOLIC_CONST (x))
12937 return legitimize_pic_address (x, 0);
12940 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12941 return machopic_indirect_data_reference (x, 0);
12944 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12945 if (GET_CODE (x) == ASHIFT
12946 && CONST_INT_P (XEXP (x, 1))
12947 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12950 log = INTVAL (XEXP (x, 1));
12951 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12952 GEN_INT (1 << log));
12955 if (GET_CODE (x) == PLUS)
12957 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12959 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12960 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12961 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12964 log = INTVAL (XEXP (XEXP (x, 0), 1));
12965 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12966 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12967 GEN_INT (1 << log));
12970 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12971 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12972 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12975 log = INTVAL (XEXP (XEXP (x, 1), 1));
12976 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12977 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12978 GEN_INT (1 << log));
12981 /* Put multiply first if it isn't already. */
12982 if (GET_CODE (XEXP (x, 1)) == MULT)
12984 rtx tmp = XEXP (x, 0);
12985 XEXP (x, 0) = XEXP (x, 1);
12990 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12991 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12992 created by virtual register instantiation, register elimination, and
12993 similar optimizations. */
12994 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12997 x = gen_rtx_PLUS (Pmode,
12998 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12999 XEXP (XEXP (x, 1), 0)),
13000 XEXP (XEXP (x, 1), 1));
13004 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13005 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13006 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13007 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13008 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13009 && CONSTANT_P (XEXP (x, 1)))
13012 rtx other = NULL_RTX;
13014 if (CONST_INT_P (XEXP (x, 1)))
13016 constant = XEXP (x, 1);
13017 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13019 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13021 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13022 other = XEXP (x, 1);
13030 x = gen_rtx_PLUS (Pmode,
13031 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13032 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13033 plus_constant (other, INTVAL (constant)));
13037 if (changed && ix86_legitimate_address_p (mode, x, false))
13040 if (GET_CODE (XEXP (x, 0)) == MULT)
13043 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13046 if (GET_CODE (XEXP (x, 1)) == MULT)
13049 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13053 && REG_P (XEXP (x, 1))
13054 && REG_P (XEXP (x, 0)))
13057 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13060 x = legitimize_pic_address (x, 0);
13063 if (changed && ix86_legitimate_address_p (mode, x, false))
13066 if (REG_P (XEXP (x, 0)))
13068 rtx temp = gen_reg_rtx (Pmode);
13069 rtx val = force_operand (XEXP (x, 1), temp);
13072 if (GET_MODE (val) != Pmode)
13073 val = convert_to_mode (Pmode, val, 1);
13074 emit_move_insn (temp, val);
13077 XEXP (x, 1) = temp;
13081 else if (REG_P (XEXP (x, 1)))
13083 rtx temp = gen_reg_rtx (Pmode);
13084 rtx val = force_operand (XEXP (x, 0), temp);
13087 if (GET_MODE (val) != Pmode)
13088 val = convert_to_mode (Pmode, val, 1);
13089 emit_move_insn (temp, val);
13092 XEXP (x, 0) = temp;
13100 /* Print an integer constant expression in assembler syntax. Addition
13101 and subtraction are the only arithmetic that may appear in these
13102 expressions. FILE is the stdio stream to write to, X is the rtx, and
13103 CODE is the operand print code from the output string. */
13106 output_pic_addr_const (FILE *file, rtx x, int code)
13110 switch (GET_CODE (x))
13113 gcc_assert (flag_pic);
13118 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13119 output_addr_const (file, x);
13122 const char *name = XSTR (x, 0);
13124 /* Mark the decl as referenced so that cgraph will
13125 output the function. */
13126 if (SYMBOL_REF_DECL (x))
13127 mark_decl_referenced (SYMBOL_REF_DECL (x));
13130 if (MACHOPIC_INDIRECT
13131 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13132 name = machopic_indirection_name (x, /*stub_p=*/true);
13134 assemble_name (file, name);
13136 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13137 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13138 fputs ("@PLT", file);
13145 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13146 assemble_name (asm_out_file, buf);
13150 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13154 /* This used to output parentheses around the expression,
13155 but that does not work on the 386 (either ATT or BSD assembler). */
13156 output_pic_addr_const (file, XEXP (x, 0), code);
13160 if (GET_MODE (x) == VOIDmode)
13162 /* We can use %d if the number is <32 bits and positive. */
13163 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13164 fprintf (file, "0x%lx%08lx",
13165 (unsigned long) CONST_DOUBLE_HIGH (x),
13166 (unsigned long) CONST_DOUBLE_LOW (x));
13168 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13171 /* We can't handle floating point constants;
13172 TARGET_PRINT_OPERAND must handle them. */
13173 output_operand_lossage ("floating constant misused");
13177 /* Some assemblers need integer constants to appear first. */
13178 if (CONST_INT_P (XEXP (x, 0)))
13180 output_pic_addr_const (file, XEXP (x, 0), code);
13182 output_pic_addr_const (file, XEXP (x, 1), code);
13186 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13187 output_pic_addr_const (file, XEXP (x, 1), code);
13189 output_pic_addr_const (file, XEXP (x, 0), code);
13195 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13196 output_pic_addr_const (file, XEXP (x, 0), code);
13198 output_pic_addr_const (file, XEXP (x, 1), code);
13200 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13204 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13206 bool f = i386_asm_output_addr_const_extra (file, x);
13211 gcc_assert (XVECLEN (x, 0) == 1);
13212 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13213 switch (XINT (x, 1))
13216 fputs ("@GOT", file);
13218 case UNSPEC_GOTOFF:
13219 fputs ("@GOTOFF", file);
13221 case UNSPEC_PLTOFF:
13222 fputs ("@PLTOFF", file);
13225 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13226 "(%rip)" : "[rip]", file);
13228 case UNSPEC_GOTPCREL:
13229 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13230 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13232 case UNSPEC_GOTTPOFF:
13233 /* FIXME: This might be @TPOFF in Sun ld too. */
13234 fputs ("@gottpoff", file);
13237 fputs ("@tpoff", file);
13239 case UNSPEC_NTPOFF:
13241 fputs ("@tpoff", file);
13243 fputs ("@ntpoff", file);
13245 case UNSPEC_DTPOFF:
13246 fputs ("@dtpoff", file);
13248 case UNSPEC_GOTNTPOFF:
13250 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13251 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13253 fputs ("@gotntpoff", file);
13255 case UNSPEC_INDNTPOFF:
13256 fputs ("@indntpoff", file);
13259 case UNSPEC_MACHOPIC_OFFSET:
13261 machopic_output_function_base_name (file);
13265 output_operand_lossage ("invalid UNSPEC as operand");
13271 output_operand_lossage ("invalid expression as operand");
13275 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13276 We need to emit DTP-relative relocations. */
13278 static void ATTRIBUTE_UNUSED
13279 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13281 fputs (ASM_LONG, file);
13282 output_addr_const (file, x);
13283 fputs ("@dtpoff", file);
13289 fputs (", 0", file);
13292 gcc_unreachable ();
13296 /* Return true if X is a representation of the PIC register. This copes
13297 with calls from ix86_find_base_term, where the register might have
13298 been replaced by a cselib value. */
13301 ix86_pic_register_p (rtx x)
13303 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13304 return (pic_offset_table_rtx
13305 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13307 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13310 /* Helper function for ix86_delegitimize_address.
13311 Attempt to delegitimize TLS local-exec accesses. */
13314 ix86_delegitimize_tls_address (rtx orig_x)
13316 rtx x = orig_x, unspec;
13317 struct ix86_address addr;
13319 if (!TARGET_TLS_DIRECT_SEG_REFS)
13323 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13325 if (ix86_decompose_address (x, &addr) == 0
13326 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13327 || addr.disp == NULL_RTX
13328 || GET_CODE (addr.disp) != CONST)
13330 unspec = XEXP (addr.disp, 0);
13331 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13332 unspec = XEXP (unspec, 0);
13333 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13335 x = XVECEXP (unspec, 0, 0);
13336 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13337 if (unspec != XEXP (addr.disp, 0))
13338 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13341 rtx idx = addr.index;
13342 if (addr.scale != 1)
13343 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13344 x = gen_rtx_PLUS (Pmode, idx, x);
13347 x = gen_rtx_PLUS (Pmode, addr.base, x);
13348 if (MEM_P (orig_x))
13349 x = replace_equiv_address_nv (orig_x, x);
13353 /* In the name of slightly smaller debug output, and to cater to
13354 general assembler lossage, recognize PIC+GOTOFF and turn it back
13355 into a direct symbol reference.
13357 On Darwin, this is necessary to avoid a crash, because Darwin
13358 has a different PIC label for each routine but the DWARF debugging
13359 information is not associated with any particular routine, so it's
13360 necessary to remove references to the PIC label from RTL stored by
13361 the DWARF output code. */
13364 ix86_delegitimize_address (rtx x)
13366 rtx orig_x = delegitimize_mem_from_attrs (x);
13367 /* addend is NULL or some rtx if x is something+GOTOFF where
13368 something doesn't include the PIC register. */
13369 rtx addend = NULL_RTX;
13370 /* reg_addend is NULL or a multiple of some register. */
13371 rtx reg_addend = NULL_RTX;
13372 /* const_addend is NULL or a const_int. */
13373 rtx const_addend = NULL_RTX;
13374 /* This is the result, or NULL. */
13375 rtx result = NULL_RTX;
13384 if (GET_CODE (x) == CONST
13385 && GET_CODE (XEXP (x, 0)) == PLUS
13386 && GET_MODE (XEXP (x, 0)) == Pmode
13387 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13388 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13389 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13391 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13392 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13393 if (MEM_P (orig_x))
13394 x = replace_equiv_address_nv (orig_x, x);
13397 if (GET_CODE (x) != CONST
13398 || GET_CODE (XEXP (x, 0)) != UNSPEC
13399 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13400 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13401 || (!MEM_P (orig_x) && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL))
13402 return ix86_delegitimize_tls_address (orig_x);
13403 x = XVECEXP (XEXP (x, 0), 0, 0);
13404 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13406 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13414 if (GET_CODE (x) != PLUS
13415 || GET_CODE (XEXP (x, 1)) != CONST)
13416 return ix86_delegitimize_tls_address (orig_x);
13418 if (ix86_pic_register_p (XEXP (x, 0)))
13419 /* %ebx + GOT/GOTOFF */
13421 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13423 /* %ebx + %reg * scale + GOT/GOTOFF */
13424 reg_addend = XEXP (x, 0);
13425 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13426 reg_addend = XEXP (reg_addend, 1);
13427 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13428 reg_addend = XEXP (reg_addend, 0);
13431 reg_addend = NULL_RTX;
13432 addend = XEXP (x, 0);
13436 addend = XEXP (x, 0);
13438 x = XEXP (XEXP (x, 1), 0);
13439 if (GET_CODE (x) == PLUS
13440 && CONST_INT_P (XEXP (x, 1)))
13442 const_addend = XEXP (x, 1);
13446 if (GET_CODE (x) == UNSPEC
13447 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13448 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13449 result = XVECEXP (x, 0, 0);
13451 if (TARGET_MACHO && darwin_local_data_pic (x)
13452 && !MEM_P (orig_x))
13453 result = XVECEXP (x, 0, 0);
13456 return ix86_delegitimize_tls_address (orig_x);
13459 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13461 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13464 /* If the rest of original X doesn't involve the PIC register, add
13465 addend and subtract pic_offset_table_rtx. This can happen e.g.
13467 leal (%ebx, %ecx, 4), %ecx
13469 movl foo@GOTOFF(%ecx), %edx
13470 in which case we return (%ecx - %ebx) + foo. */
13471 if (pic_offset_table_rtx)
13472 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13473 pic_offset_table_rtx),
13478 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13480 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13481 if (result == NULL_RTX)
13487 /* If X is a machine specific address (i.e. a symbol or label being
13488 referenced as a displacement from the GOT implemented using an
13489 UNSPEC), then return the base term. Otherwise return X. */
13492 ix86_find_base_term (rtx x)
13498 if (GET_CODE (x) != CONST)
13500 term = XEXP (x, 0);
13501 if (GET_CODE (term) == PLUS
13502 && (CONST_INT_P (XEXP (term, 1))
13503 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13504 term = XEXP (term, 0);
13505 if (GET_CODE (term) != UNSPEC
13506 || (XINT (term, 1) != UNSPEC_GOTPCREL
13507 && XINT (term, 1) != UNSPEC_PCREL))
13510 return XVECEXP (term, 0, 0);
13513 return ix86_delegitimize_address (x);
13517 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13518 int fp, FILE *file)
13520 const char *suffix;
13522 if (mode == CCFPmode || mode == CCFPUmode)
13524 code = ix86_fp_compare_code_to_integer (code);
13528 code = reverse_condition (code);
13579 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13583 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13584 Those same assemblers have the same but opposite lossage on cmov. */
13585 if (mode == CCmode)
13586 suffix = fp ? "nbe" : "a";
13587 else if (mode == CCCmode)
13590 gcc_unreachable ();
13606 gcc_unreachable ();
13610 gcc_assert (mode == CCmode || mode == CCCmode);
13627 gcc_unreachable ();
13631 /* ??? As above. */
13632 gcc_assert (mode == CCmode || mode == CCCmode);
13633 suffix = fp ? "nb" : "ae";
13636 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13640 /* ??? As above. */
13641 if (mode == CCmode)
13643 else if (mode == CCCmode)
13644 suffix = fp ? "nb" : "ae";
13646 gcc_unreachable ();
13649 suffix = fp ? "u" : "p";
13652 suffix = fp ? "nu" : "np";
13655 gcc_unreachable ();
13657 fputs (suffix, file);
13660 /* Print the name of register X to FILE based on its machine mode and number.
13661 If CODE is 'w', pretend the mode is HImode.
13662 If CODE is 'b', pretend the mode is QImode.
13663 If CODE is 'k', pretend the mode is SImode.
13664 If CODE is 'q', pretend the mode is DImode.
13665 If CODE is 'x', pretend the mode is V4SFmode.
13666 If CODE is 't', pretend the mode is V8SFmode.
13667 If CODE is 'h', pretend the reg is the 'high' byte register.
13668 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13669 If CODE is 'd', duplicate the operand for AVX instruction.
13673 print_reg (rtx x, int code, FILE *file)
13676 bool duplicated = code == 'd' && TARGET_AVX;
13678 gcc_assert (x == pc_rtx
13679 || (REGNO (x) != ARG_POINTER_REGNUM
13680 && REGNO (x) != FRAME_POINTER_REGNUM
13681 && REGNO (x) != FLAGS_REG
13682 && REGNO (x) != FPSR_REG
13683 && REGNO (x) != FPCR_REG));
13685 if (ASSEMBLER_DIALECT == ASM_ATT)
13690 gcc_assert (TARGET_64BIT);
13691 fputs ("rip", file);
13695 if (code == 'w' || MMX_REG_P (x))
13697 else if (code == 'b')
13699 else if (code == 'k')
13701 else if (code == 'q')
13703 else if (code == 'y')
13705 else if (code == 'h')
13707 else if (code == 'x')
13709 else if (code == 't')
13712 code = GET_MODE_SIZE (GET_MODE (x));
13714 /* Irritatingly, AMD extended registers use different naming convention
13715 from the normal registers: "r%d[bwd]" */
13716 if (REX_INT_REG_P (x))
13718 gcc_assert (TARGET_64BIT);
13720 fprint_ul (file, REGNO (x) - FIRST_REX_INT_REG + 8);
13724 error ("extended registers have no high halves");
13739 error ("unsupported operand size for extended register");
13749 if (STACK_TOP_P (x))
13758 if (! ANY_FP_REG_P (x))
13759 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13764 reg = hi_reg_name[REGNO (x)];
13767 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13769 reg = qi_reg_name[REGNO (x)];
13772 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13774 reg = qi_high_reg_name[REGNO (x)];
13779 gcc_assert (!duplicated);
13781 fputs (hi_reg_name[REGNO (x)] + 1, file);
13786 gcc_unreachable ();
13792 if (ASSEMBLER_DIALECT == ASM_ATT)
13793 fprintf (file, ", %%%s", reg);
13795 fprintf (file, ", %s", reg);
13799 /* Locate some local-dynamic symbol still in use by this function
13800 so that we can print its name in some tls_local_dynamic_base
13804 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13808 if (GET_CODE (x) == SYMBOL_REF
13809 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13811 cfun->machine->some_ld_name = XSTR (x, 0);
13818 static const char *
13819 get_some_local_dynamic_name (void)
13823 if (cfun->machine->some_ld_name)
13824 return cfun->machine->some_ld_name;
13826 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13827 if (NONDEBUG_INSN_P (insn)
13828 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13829 return cfun->machine->some_ld_name;
13834 /* Meaning of CODE:
13835 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13836 C -- print opcode suffix for set/cmov insn.
13837 c -- like C, but print reversed condition
13838 F,f -- likewise, but for floating-point.
13839 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13841 R -- print the prefix for register names.
13842 z -- print the opcode suffix for the size of the current operand.
13843 Z -- likewise, with special suffixes for x87 instructions.
13844 * -- print a star (in certain assembler syntax)
13845 A -- print an absolute memory reference.
13846 E -- print address with DImode register names if TARGET_64BIT.
13847 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13848 s -- print a shift double count, followed by the assemblers argument
13850 b -- print the QImode name of the register for the indicated operand.
13851 %b0 would print %al if operands[0] is reg 0.
13852 w -- likewise, print the HImode name of the register.
13853 k -- likewise, print the SImode name of the register.
13854 q -- likewise, print the DImode name of the register.
13855 x -- likewise, print the V4SFmode name of the register.
13856 t -- likewise, print the V8SFmode name of the register.
13857 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13858 y -- print "st(0)" instead of "st" as a register.
13859 d -- print duplicated register operand for AVX instruction.
13860 D -- print condition for SSE cmp instruction.
13861 P -- if PIC, print an @PLT suffix.
13862 p -- print raw symbol name.
13863 X -- don't print any sort of PIC '@' suffix for a symbol.
13864 & -- print some in-use local-dynamic symbol name.
13865 H -- print a memory address offset by 8; used for sse high-parts
13866 Y -- print condition for XOP pcom* instruction.
13867 + -- print a branch hint as 'cs' or 'ds' prefix
13868 ; -- print a semicolon (after prefixes due to bug in older gas).
13869 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13870 @ -- print a segment register of thread base pointer load
13874 ix86_print_operand (FILE *file, rtx x, int code)
13881 if (ASSEMBLER_DIALECT == ASM_ATT)
13887 const char *name = get_some_local_dynamic_name ();
13889 output_operand_lossage ("'%%&' used without any "
13890 "local dynamic TLS references");
13892 assemble_name (file, name);
13897 switch (ASSEMBLER_DIALECT)
13904 /* Intel syntax. For absolute addresses, registers should not
13905 be surrounded by braces. */
13909 ix86_print_operand (file, x, 0);
13916 gcc_unreachable ();
13919 ix86_print_operand (file, x, 0);
13923 /* Wrap address in an UNSPEC to declare special handling. */
13925 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
13927 output_address (x);
13931 if (ASSEMBLER_DIALECT == ASM_ATT)
13936 if (ASSEMBLER_DIALECT == ASM_ATT)
13941 if (ASSEMBLER_DIALECT == ASM_ATT)
13946 if (ASSEMBLER_DIALECT == ASM_ATT)
13951 if (ASSEMBLER_DIALECT == ASM_ATT)
13956 if (ASSEMBLER_DIALECT == ASM_ATT)
13961 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13963 /* Opcodes don't get size suffixes if using Intel opcodes. */
13964 if (ASSEMBLER_DIALECT == ASM_INTEL)
13967 switch (GET_MODE_SIZE (GET_MODE (x)))
13986 output_operand_lossage
13987 ("invalid operand size for operand code '%c'", code);
13992 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13994 (0, "non-integer operand used with operand code '%c'", code);
13998 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13999 if (ASSEMBLER_DIALECT == ASM_INTEL)
14002 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14004 switch (GET_MODE_SIZE (GET_MODE (x)))
14007 #ifdef HAVE_AS_IX86_FILDS
14017 #ifdef HAVE_AS_IX86_FILDQ
14020 fputs ("ll", file);
14028 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14030 /* 387 opcodes don't get size suffixes
14031 if the operands are registers. */
14032 if (STACK_REG_P (x))
14035 switch (GET_MODE_SIZE (GET_MODE (x)))
14056 output_operand_lossage
14057 ("invalid operand type used with operand code '%c'", code);
14061 output_operand_lossage
14062 ("invalid operand size for operand code '%c'", code);
14080 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14082 ix86_print_operand (file, x, 0);
14083 fputs (", ", file);
14088 /* Little bit of braindamage here. The SSE compare instructions
14089 does use completely different names for the comparisons that the
14090 fp conditional moves. */
14093 switch (GET_CODE (x))
14096 fputs ("eq", file);
14099 fputs ("eq_us", file);
14102 fputs ("lt", file);
14105 fputs ("nge", file);
14108 fputs ("le", file);
14111 fputs ("ngt", file);
14114 fputs ("unord", file);
14117 fputs ("neq", file);
14120 fputs ("neq_oq", file);
14123 fputs ("ge", file);
14126 fputs ("nlt", file);
14129 fputs ("gt", file);
14132 fputs ("nle", file);
14135 fputs ("ord", file);
14138 output_operand_lossage ("operand is not a condition code, "
14139 "invalid operand code 'D'");
14145 switch (GET_CODE (x))
14149 fputs ("eq", file);
14153 fputs ("lt", file);
14157 fputs ("le", file);
14160 fputs ("unord", file);
14164 fputs ("neq", file);
14168 fputs ("nlt", file);
14172 fputs ("nle", file);
14175 fputs ("ord", file);
14178 output_operand_lossage ("operand is not a condition code, "
14179 "invalid operand code 'D'");
14185 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14186 if (ASSEMBLER_DIALECT == ASM_ATT)
14188 switch (GET_MODE (x))
14190 case HImode: putc ('w', file); break;
14192 case SFmode: putc ('l', file); break;
14194 case DFmode: putc ('q', file); break;
14195 default: gcc_unreachable ();
14202 if (!COMPARISON_P (x))
14204 output_operand_lossage ("operand is neither a constant nor a "
14205 "condition code, invalid operand code "
14209 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
14212 if (!COMPARISON_P (x))
14214 output_operand_lossage ("operand is neither a constant nor a "
14215 "condition code, invalid operand code "
14219 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14220 if (ASSEMBLER_DIALECT == ASM_ATT)
14223 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14226 /* Like above, but reverse condition */
14228 /* Check to see if argument to %c is really a constant
14229 and not a condition code which needs to be reversed. */
14230 if (!COMPARISON_P (x))
14232 output_operand_lossage ("operand is neither a constant nor a "
14233 "condition code, invalid operand "
14237 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14240 if (!COMPARISON_P (x))
14242 output_operand_lossage ("operand is neither a constant nor a "
14243 "condition code, invalid operand "
14247 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14248 if (ASSEMBLER_DIALECT == ASM_ATT)
14251 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14255 if (!offsettable_memref_p (x))
14257 output_operand_lossage ("operand is not an offsettable memory "
14258 "reference, invalid operand "
14262 /* It doesn't actually matter what mode we use here, as we're
14263 only going to use this for printing. */
14264 x = adjust_address_nv (x, DImode, 8);
14272 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14275 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14278 int pred_val = INTVAL (XEXP (x, 0));
14280 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14281 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14283 int taken = pred_val > REG_BR_PROB_BASE / 2;
14284 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14286 /* Emit hints only in the case default branch prediction
14287 heuristics would fail. */
14288 if (taken != cputaken)
14290 /* We use 3e (DS) prefix for taken branches and
14291 2e (CS) prefix for not taken branches. */
14293 fputs ("ds ; ", file);
14295 fputs ("cs ; ", file);
14303 switch (GET_CODE (x))
14306 fputs ("neq", file);
14309 fputs ("eq", file);
14313 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14317 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14321 fputs ("le", file);
14325 fputs ("lt", file);
14328 fputs ("unord", file);
14331 fputs ("ord", file);
14334 fputs ("ueq", file);
14337 fputs ("nlt", file);
14340 fputs ("nle", file);
14343 fputs ("ule", file);
14346 fputs ("ult", file);
14349 fputs ("une", file);
14352 output_operand_lossage ("operand is not a condition code, "
14353 "invalid operand code 'Y'");
14359 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14365 if (ASSEMBLER_DIALECT == ASM_ATT)
14368 /* The kernel uses a different segment register for performance
14369 reasons; a system call would not have to trash the userspace
14370 segment register, which would be expensive. */
14371 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14372 fputs ("fs", file);
14374 fputs ("gs", file);
14378 putc (TARGET_AVX2 ? 'i' : 'f', file);
14382 output_operand_lossage ("invalid operand code '%c'", code);
14387 print_reg (x, code, file);
14389 else if (MEM_P (x))
14391 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14392 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14393 && GET_MODE (x) != BLKmode)
14396 switch (GET_MODE_SIZE (GET_MODE (x)))
14398 case 1: size = "BYTE"; break;
14399 case 2: size = "WORD"; break;
14400 case 4: size = "DWORD"; break;
14401 case 8: size = "QWORD"; break;
14402 case 12: size = "TBYTE"; break;
14404 if (GET_MODE (x) == XFmode)
14409 case 32: size = "YMMWORD"; break;
14411 gcc_unreachable ();
14414 /* Check for explicit size override (codes 'b', 'w', 'k',
14418 else if (code == 'w')
14420 else if (code == 'k')
14422 else if (code == 'q')
14424 else if (code == 'x')
14427 fputs (size, file);
14428 fputs (" PTR ", file);
14432 /* Avoid (%rip) for call operands. */
14433 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14434 && !CONST_INT_P (x))
14435 output_addr_const (file, x);
14436 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14437 output_operand_lossage ("invalid constraints for operand");
14439 output_address (x);
14442 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14447 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14448 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14450 if (ASSEMBLER_DIALECT == ASM_ATT)
14452 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14454 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14456 fprintf (file, "0x%08x", (unsigned int) l);
14459 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14464 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14465 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14467 if (ASSEMBLER_DIALECT == ASM_ATT)
14469 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14472 /* These float cases don't actually occur as immediate operands. */
14473 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14477 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14478 fputs (dstr, file);
14483 /* We have patterns that allow zero sets of memory, for instance.
14484 In 64-bit mode, we should probably support all 8-byte vectors,
14485 since we can in fact encode that into an immediate. */
14486 if (GET_CODE (x) == CONST_VECTOR)
14488 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14492 if (code != 'P' && code != 'p')
14494 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14496 if (ASSEMBLER_DIALECT == ASM_ATT)
14499 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14500 || GET_CODE (x) == LABEL_REF)
14502 if (ASSEMBLER_DIALECT == ASM_ATT)
14505 fputs ("OFFSET FLAT:", file);
14508 if (CONST_INT_P (x))
14509 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14510 else if (flag_pic || MACHOPIC_INDIRECT)
14511 output_pic_addr_const (file, x, code);
14513 output_addr_const (file, x);
14518 ix86_print_operand_punct_valid_p (unsigned char code)
14520 return (code == '@' || code == '*' || code == '+'
14521 || code == '&' || code == ';' || code == '~');
14524 /* Print a memory operand whose address is ADDR. */
14527 ix86_print_operand_address (FILE *file, rtx addr)
14529 struct ix86_address parts;
14530 rtx base, index, disp;
14536 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14538 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14539 gcc_assert (parts.index == NULL_RTX);
14540 parts.index = XVECEXP (addr, 0, 1);
14541 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14542 addr = XVECEXP (addr, 0, 0);
14545 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
14547 gcc_assert (TARGET_64BIT);
14548 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14552 ok = ix86_decompose_address (addr, &parts);
14556 if (parts.base && GET_CODE (parts.base) == SUBREG)
14558 rtx tmp = SUBREG_REG (parts.base);
14559 parts.base = simplify_subreg (GET_MODE (parts.base),
14560 tmp, GET_MODE (tmp), 0);
14561 gcc_assert (parts.base != NULL_RTX);
14564 if (parts.index && GET_CODE (parts.index) == SUBREG)
14566 rtx tmp = SUBREG_REG (parts.index);
14567 parts.index = simplify_subreg (GET_MODE (parts.index),
14568 tmp, GET_MODE (tmp), 0);
14569 gcc_assert (parts.index != NULL_RTX);
14573 index = parts.index;
14575 scale = parts.scale;
14583 if (ASSEMBLER_DIALECT == ASM_ATT)
14585 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14588 gcc_unreachable ();
14591 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14592 if (TARGET_64BIT && !base && !index)
14596 if (GET_CODE (disp) == CONST
14597 && GET_CODE (XEXP (disp, 0)) == PLUS
14598 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14599 symbol = XEXP (XEXP (disp, 0), 0);
14601 if (GET_CODE (symbol) == LABEL_REF
14602 || (GET_CODE (symbol) == SYMBOL_REF
14603 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14606 if (!base && !index)
14608 /* Displacement only requires special attention. */
14610 if (CONST_INT_P (disp))
14612 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14613 fputs ("ds:", file);
14614 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14617 output_pic_addr_const (file, disp, 0);
14619 output_addr_const (file, disp);
14623 /* Print SImode register names to force addr32 prefix. */
14624 if (GET_CODE (addr) == SUBREG)
14626 gcc_assert (TARGET_64BIT);
14627 gcc_assert (GET_MODE (addr) == SImode);
14628 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
14629 gcc_assert (!code);
14632 else if (GET_CODE (addr) == ZERO_EXTEND
14633 || GET_CODE (addr) == AND)
14635 gcc_assert (TARGET_64BIT);
14636 gcc_assert (GET_MODE (addr) == DImode);
14637 gcc_assert (!code);
14641 if (ASSEMBLER_DIALECT == ASM_ATT)
14646 output_pic_addr_const (file, disp, 0);
14647 else if (GET_CODE (disp) == LABEL_REF)
14648 output_asm_label (disp);
14650 output_addr_const (file, disp);
14655 print_reg (base, code, file);
14659 print_reg (index, vsib ? 0 : code, file);
14660 if (scale != 1 || vsib)
14661 fprintf (file, ",%d", scale);
14667 rtx offset = NULL_RTX;
14671 /* Pull out the offset of a symbol; print any symbol itself. */
14672 if (GET_CODE (disp) == CONST
14673 && GET_CODE (XEXP (disp, 0)) == PLUS
14674 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14676 offset = XEXP (XEXP (disp, 0), 1);
14677 disp = gen_rtx_CONST (VOIDmode,
14678 XEXP (XEXP (disp, 0), 0));
14682 output_pic_addr_const (file, disp, 0);
14683 else if (GET_CODE (disp) == LABEL_REF)
14684 output_asm_label (disp);
14685 else if (CONST_INT_P (disp))
14688 output_addr_const (file, disp);
14694 print_reg (base, code, file);
14697 if (INTVAL (offset) >= 0)
14699 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14703 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14710 print_reg (index, vsib ? 0 : code, file);
14711 if (scale != 1 || vsib)
14712 fprintf (file, "*%d", scale);
14719 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14722 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14726 if (GET_CODE (x) != UNSPEC)
14729 op = XVECEXP (x, 0, 0);
14730 switch (XINT (x, 1))
14732 case UNSPEC_GOTTPOFF:
14733 output_addr_const (file, op);
14734 /* FIXME: This might be @TPOFF in Sun ld. */
14735 fputs ("@gottpoff", file);
14738 output_addr_const (file, op);
14739 fputs ("@tpoff", file);
14741 case UNSPEC_NTPOFF:
14742 output_addr_const (file, op);
14744 fputs ("@tpoff", file);
14746 fputs ("@ntpoff", file);
14748 case UNSPEC_DTPOFF:
14749 output_addr_const (file, op);
14750 fputs ("@dtpoff", file);
14752 case UNSPEC_GOTNTPOFF:
14753 output_addr_const (file, op);
14755 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14756 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14758 fputs ("@gotntpoff", file);
14760 case UNSPEC_INDNTPOFF:
14761 output_addr_const (file, op);
14762 fputs ("@indntpoff", file);
14765 case UNSPEC_MACHOPIC_OFFSET:
14766 output_addr_const (file, op);
14768 machopic_output_function_base_name (file);
14772 case UNSPEC_STACK_CHECK:
14776 gcc_assert (flag_split_stack);
14778 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14779 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14781 gcc_unreachable ();
14784 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14795 /* Split one or more double-mode RTL references into pairs of half-mode
14796 references. The RTL can be REG, offsettable MEM, integer constant, or
14797 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14798 split and "num" is its length. lo_half and hi_half are output arrays
14799 that parallel "operands". */
14802 split_double_mode (enum machine_mode mode, rtx operands[],
14803 int num, rtx lo_half[], rtx hi_half[])
14805 enum machine_mode half_mode;
14811 half_mode = DImode;
14814 half_mode = SImode;
14817 gcc_unreachable ();
14820 byte = GET_MODE_SIZE (half_mode);
14824 rtx op = operands[num];
14826 /* simplify_subreg refuse to split volatile memory addresses,
14827 but we still have to handle it. */
14830 lo_half[num] = adjust_address (op, half_mode, 0);
14831 hi_half[num] = adjust_address (op, half_mode, byte);
14835 lo_half[num] = simplify_gen_subreg (half_mode, op,
14836 GET_MODE (op) == VOIDmode
14837 ? mode : GET_MODE (op), 0);
14838 hi_half[num] = simplify_gen_subreg (half_mode, op,
14839 GET_MODE (op) == VOIDmode
14840 ? mode : GET_MODE (op), byte);
14845 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14846 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14847 is the expression of the binary operation. The output may either be
14848 emitted here, or returned to the caller, like all output_* functions.
14850 There is no guarantee that the operands are the same mode, as they
14851 might be within FLOAT or FLOAT_EXTEND expressions. */
14853 #ifndef SYSV386_COMPAT
14854 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14855 wants to fix the assemblers because that causes incompatibility
14856 with gcc. No-one wants to fix gcc because that causes
14857 incompatibility with assemblers... You can use the option of
14858 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14859 #define SYSV386_COMPAT 1
14863 output_387_binary_op (rtx insn, rtx *operands)
14865 static char buf[40];
14868 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14870 #ifdef ENABLE_CHECKING
14871 /* Even if we do not want to check the inputs, this documents input
14872 constraints. Which helps in understanding the following code. */
14873 if (STACK_REG_P (operands[0])
14874 && ((REG_P (operands[1])
14875 && REGNO (operands[0]) == REGNO (operands[1])
14876 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14877 || (REG_P (operands[2])
14878 && REGNO (operands[0]) == REGNO (operands[2])
14879 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14880 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14883 gcc_assert (is_sse);
14886 switch (GET_CODE (operands[3]))
14889 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14890 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14898 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14899 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14907 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14908 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14916 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14917 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14925 gcc_unreachable ();
14932 strcpy (buf, ssep);
14933 if (GET_MODE (operands[0]) == SFmode)
14934 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14936 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14940 strcpy (buf, ssep + 1);
14941 if (GET_MODE (operands[0]) == SFmode)
14942 strcat (buf, "ss\t{%2, %0|%0, %2}");
14944 strcat (buf, "sd\t{%2, %0|%0, %2}");
14950 switch (GET_CODE (operands[3]))
14954 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14956 rtx temp = operands[2];
14957 operands[2] = operands[1];
14958 operands[1] = temp;
14961 /* know operands[0] == operands[1]. */
14963 if (MEM_P (operands[2]))
14969 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14971 if (STACK_TOP_P (operands[0]))
14972 /* How is it that we are storing to a dead operand[2]?
14973 Well, presumably operands[1] is dead too. We can't
14974 store the result to st(0) as st(0) gets popped on this
14975 instruction. Instead store to operands[2] (which I
14976 think has to be st(1)). st(1) will be popped later.
14977 gcc <= 2.8.1 didn't have this check and generated
14978 assembly code that the Unixware assembler rejected. */
14979 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14981 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14985 if (STACK_TOP_P (operands[0]))
14986 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14988 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14993 if (MEM_P (operands[1]))
14999 if (MEM_P (operands[2]))
15005 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15008 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15009 derived assemblers, confusingly reverse the direction of
15010 the operation for fsub{r} and fdiv{r} when the
15011 destination register is not st(0). The Intel assembler
15012 doesn't have this brain damage. Read !SYSV386_COMPAT to
15013 figure out what the hardware really does. */
15014 if (STACK_TOP_P (operands[0]))
15015 p = "{p\t%0, %2|rp\t%2, %0}";
15017 p = "{rp\t%2, %0|p\t%0, %2}";
15019 if (STACK_TOP_P (operands[0]))
15020 /* As above for fmul/fadd, we can't store to st(0). */
15021 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15023 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15028 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15031 if (STACK_TOP_P (operands[0]))
15032 p = "{rp\t%0, %1|p\t%1, %0}";
15034 p = "{p\t%1, %0|rp\t%0, %1}";
15036 if (STACK_TOP_P (operands[0]))
15037 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15039 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15044 if (STACK_TOP_P (operands[0]))
15046 if (STACK_TOP_P (operands[1]))
15047 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15049 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15052 else if (STACK_TOP_P (operands[1]))
15055 p = "{\t%1, %0|r\t%0, %1}";
15057 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15063 p = "{r\t%2, %0|\t%0, %2}";
15065 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15071 gcc_unreachable ();
15078 /* Return needed mode for entity in optimize_mode_switching pass. */
15081 ix86_mode_needed (int entity, rtx insn)
15083 enum attr_i387_cw mode;
15085 /* The mode UNINITIALIZED is used to store control word after a
15086 function call or ASM pattern. The mode ANY specify that function
15087 has no requirements on the control word and make no changes in the
15088 bits we are interested in. */
15091 || (NONJUMP_INSN_P (insn)
15092 && (asm_noperands (PATTERN (insn)) >= 0
15093 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15094 return I387_CW_UNINITIALIZED;
15096 if (recog_memoized (insn) < 0)
15097 return I387_CW_ANY;
15099 mode = get_attr_i387_cw (insn);
15104 if (mode == I387_CW_TRUNC)
15109 if (mode == I387_CW_FLOOR)
15114 if (mode == I387_CW_CEIL)
15119 if (mode == I387_CW_MASK_PM)
15124 gcc_unreachable ();
15127 return I387_CW_ANY;
15130 /* Output code to initialize control word copies used by trunc?f?i and
15131 rounding patterns. CURRENT_MODE is set to current control word,
15132 while NEW_MODE is set to new control word. */
15135 emit_i387_cw_initialization (int mode)
15137 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15140 enum ix86_stack_slot slot;
15142 rtx reg = gen_reg_rtx (HImode);
15144 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15145 emit_move_insn (reg, copy_rtx (stored_mode));
15147 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15148 || optimize_function_for_size_p (cfun))
15152 case I387_CW_TRUNC:
15153 /* round toward zero (truncate) */
15154 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15155 slot = SLOT_CW_TRUNC;
15158 case I387_CW_FLOOR:
15159 /* round down toward -oo */
15160 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15161 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15162 slot = SLOT_CW_FLOOR;
15166 /* round up toward +oo */
15167 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15168 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15169 slot = SLOT_CW_CEIL;
15172 case I387_CW_MASK_PM:
15173 /* mask precision exception for nearbyint() */
15174 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15175 slot = SLOT_CW_MASK_PM;
15179 gcc_unreachable ();
15186 case I387_CW_TRUNC:
15187 /* round toward zero (truncate) */
15188 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15189 slot = SLOT_CW_TRUNC;
15192 case I387_CW_FLOOR:
15193 /* round down toward -oo */
15194 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15195 slot = SLOT_CW_FLOOR;
15199 /* round up toward +oo */
15200 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15201 slot = SLOT_CW_CEIL;
15204 case I387_CW_MASK_PM:
15205 /* mask precision exception for nearbyint() */
15206 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15207 slot = SLOT_CW_MASK_PM;
15211 gcc_unreachable ();
15215 gcc_assert (slot < MAX_386_STACK_LOCALS);
15217 new_mode = assign_386_stack_local (HImode, slot);
15218 emit_move_insn (new_mode, reg);
15221 /* Output code for INSN to convert a float to a signed int. OPERANDS
15222 are the insn operands. The output may be [HSD]Imode and the input
15223 operand may be [SDX]Fmode. */
15226 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
15228 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15229 int dimode_p = GET_MODE (operands[0]) == DImode;
15230 int round_mode = get_attr_i387_cw (insn);
15232 /* Jump through a hoop or two for DImode, since the hardware has no
15233 non-popping instruction. We used to do this a different way, but
15234 that was somewhat fragile and broke with post-reload splitters. */
15235 if ((dimode_p || fisttp) && !stack_top_dies)
15236 output_asm_insn ("fld\t%y1", operands);
15238 gcc_assert (STACK_TOP_P (operands[1]));
15239 gcc_assert (MEM_P (operands[0]));
15240 gcc_assert (GET_MODE (operands[1]) != TFmode);
15243 output_asm_insn ("fisttp%Z0\t%0", operands);
15246 if (round_mode != I387_CW_ANY)
15247 output_asm_insn ("fldcw\t%3", operands);
15248 if (stack_top_dies || dimode_p)
15249 output_asm_insn ("fistp%Z0\t%0", operands);
15251 output_asm_insn ("fist%Z0\t%0", operands);
15252 if (round_mode != I387_CW_ANY)
15253 output_asm_insn ("fldcw\t%2", operands);
15259 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15260 have the values zero or one, indicates the ffreep insn's operand
15261 from the OPERANDS array. */
15263 static const char *
15264 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15266 if (TARGET_USE_FFREEP)
15267 #ifdef HAVE_AS_IX86_FFREEP
15268 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15271 static char retval[32];
15272 int regno = REGNO (operands[opno]);
15274 gcc_assert (FP_REGNO_P (regno));
15276 regno -= FIRST_STACK_REG;
15278 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15283 return opno ? "fstp\t%y1" : "fstp\t%y0";
15287 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15288 should be used. UNORDERED_P is true when fucom should be used. */
15291 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
15293 int stack_top_dies;
15294 rtx cmp_op0, cmp_op1;
15295 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15299 cmp_op0 = operands[0];
15300 cmp_op1 = operands[1];
15304 cmp_op0 = operands[1];
15305 cmp_op1 = operands[2];
15310 if (GET_MODE (operands[0]) == SFmode)
15312 return "%vucomiss\t{%1, %0|%0, %1}";
15314 return "%vcomiss\t{%1, %0|%0, %1}";
15317 return "%vucomisd\t{%1, %0|%0, %1}";
15319 return "%vcomisd\t{%1, %0|%0, %1}";
15322 gcc_assert (STACK_TOP_P (cmp_op0));
15324 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15326 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15328 if (stack_top_dies)
15330 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15331 return output_387_ffreep (operands, 1);
15334 return "ftst\n\tfnstsw\t%0";
15337 if (STACK_REG_P (cmp_op1)
15339 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15340 && REGNO (cmp_op1) != FIRST_STACK_REG)
15342 /* If both the top of the 387 stack dies, and the other operand
15343 is also a stack register that dies, then this must be a
15344 `fcompp' float compare */
15348 /* There is no double popping fcomi variant. Fortunately,
15349 eflags is immune from the fstp's cc clobbering. */
15351 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15353 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15354 return output_387_ffreep (operands, 0);
15359 return "fucompp\n\tfnstsw\t%0";
15361 return "fcompp\n\tfnstsw\t%0";
15366 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15368 static const char * const alt[16] =
15370 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15371 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15372 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15373 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15375 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15376 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15380 "fcomi\t{%y1, %0|%0, %y1}",
15381 "fcomip\t{%y1, %0|%0, %y1}",
15382 "fucomi\t{%y1, %0|%0, %y1}",
15383 "fucomip\t{%y1, %0|%0, %y1}",
15394 mask = eflags_p << 3;
15395 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15396 mask |= unordered_p << 1;
15397 mask |= stack_top_dies;
15399 gcc_assert (mask < 16);
15408 ix86_output_addr_vec_elt (FILE *file, int value)
15410 const char *directive = ASM_LONG;
15414 directive = ASM_QUAD;
15416 gcc_assert (!TARGET_64BIT);
15419 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15423 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15425 const char *directive = ASM_LONG;
15428 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15429 directive = ASM_QUAD;
15431 gcc_assert (!TARGET_64BIT);
15433 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15434 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15435 fprintf (file, "%s%s%d-%s%d\n",
15436 directive, LPREFIX, value, LPREFIX, rel);
15437 else if (HAVE_AS_GOTOFF_IN_DATA)
15438 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15440 else if (TARGET_MACHO)
15442 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15443 machopic_output_function_base_name (file);
15448 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15449 GOT_SYMBOL_NAME, LPREFIX, value);
15452 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15456 ix86_expand_clear (rtx dest)
15460 /* We play register width games, which are only valid after reload. */
15461 gcc_assert (reload_completed);
15463 /* Avoid HImode and its attendant prefix byte. */
15464 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15465 dest = gen_rtx_REG (SImode, REGNO (dest));
15466 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15468 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15469 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15471 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15472 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15478 /* X is an unchanging MEM. If it is a constant pool reference, return
15479 the constant pool rtx, else NULL. */
15482 maybe_get_pool_constant (rtx x)
15484 x = ix86_delegitimize_address (XEXP (x, 0));
15486 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15487 return get_pool_constant (x);
15493 ix86_expand_move (enum machine_mode mode, rtx operands[])
15496 enum tls_model model;
15501 if (GET_CODE (op1) == SYMBOL_REF)
15503 model = SYMBOL_REF_TLS_MODEL (op1);
15506 op1 = legitimize_tls_address (op1, model, true);
15507 op1 = force_operand (op1, op0);
15510 if (GET_MODE (op1) != mode)
15511 op1 = convert_to_mode (mode, op1, 1);
15513 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15514 && SYMBOL_REF_DLLIMPORT_P (op1))
15515 op1 = legitimize_dllimport_symbol (op1, false);
15517 else if (GET_CODE (op1) == CONST
15518 && GET_CODE (XEXP (op1, 0)) == PLUS
15519 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15521 rtx addend = XEXP (XEXP (op1, 0), 1);
15522 rtx symbol = XEXP (XEXP (op1, 0), 0);
15525 model = SYMBOL_REF_TLS_MODEL (symbol);
15527 tmp = legitimize_tls_address (symbol, model, true);
15528 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15529 && SYMBOL_REF_DLLIMPORT_P (symbol))
15530 tmp = legitimize_dllimport_symbol (symbol, true);
15534 tmp = force_operand (tmp, NULL);
15535 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15536 op0, 1, OPTAB_DIRECT);
15539 if (GET_MODE (tmp) != mode)
15540 op1 = convert_to_mode (mode, tmp, 1);
15544 if ((flag_pic || MACHOPIC_INDIRECT)
15545 && symbolic_operand (op1, mode))
15547 if (TARGET_MACHO && !TARGET_64BIT)
15550 /* dynamic-no-pic */
15551 if (MACHOPIC_INDIRECT)
15553 rtx temp = ((reload_in_progress
15554 || ((op0 && REG_P (op0))
15556 ? op0 : gen_reg_rtx (Pmode));
15557 op1 = machopic_indirect_data_reference (op1, temp);
15559 op1 = machopic_legitimize_pic_address (op1, mode,
15560 temp == op1 ? 0 : temp);
15562 if (op0 != op1 && GET_CODE (op0) != MEM)
15564 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15568 if (GET_CODE (op0) == MEM)
15569 op1 = force_reg (Pmode, op1);
15573 if (GET_CODE (temp) != REG)
15574 temp = gen_reg_rtx (Pmode);
15575 temp = legitimize_pic_address (op1, temp);
15580 /* dynamic-no-pic */
15586 op1 = force_reg (mode, op1);
15587 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15589 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15590 op1 = legitimize_pic_address (op1, reg);
15593 if (GET_MODE (op1) != mode)
15594 op1 = convert_to_mode (mode, op1, 1);
15601 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15602 || !push_operand (op0, mode))
15604 op1 = force_reg (mode, op1);
15606 if (push_operand (op0, mode)
15607 && ! general_no_elim_operand (op1, mode))
15608 op1 = copy_to_mode_reg (mode, op1);
15610 /* Force large constants in 64bit compilation into register
15611 to get them CSEed. */
15612 if (can_create_pseudo_p ()
15613 && (mode == DImode) && TARGET_64BIT
15614 && immediate_operand (op1, mode)
15615 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15616 && !register_operand (op0, mode)
15618 op1 = copy_to_mode_reg (mode, op1);
15620 if (can_create_pseudo_p ()
15621 && FLOAT_MODE_P (mode)
15622 && GET_CODE (op1) == CONST_DOUBLE)
15624 /* If we are loading a floating point constant to a register,
15625 force the value to memory now, since we'll get better code
15626 out the back end. */
15628 op1 = validize_mem (force_const_mem (mode, op1));
15629 if (!register_operand (op0, mode))
15631 rtx temp = gen_reg_rtx (mode);
15632 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15633 emit_move_insn (op0, temp);
15639 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15643 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15645 rtx op0 = operands[0], op1 = operands[1];
15646 unsigned int align = GET_MODE_ALIGNMENT (mode);
15648 /* Force constants other than zero into memory. We do not know how
15649 the instructions used to build constants modify the upper 64 bits
15650 of the register, once we have that information we may be able
15651 to handle some of them more efficiently. */
15652 if (can_create_pseudo_p ()
15653 && register_operand (op0, mode)
15654 && (CONSTANT_P (op1)
15655 || (GET_CODE (op1) == SUBREG
15656 && CONSTANT_P (SUBREG_REG (op1))))
15657 && !standard_sse_constant_p (op1))
15658 op1 = validize_mem (force_const_mem (mode, op1));
15660 /* We need to check memory alignment for SSE mode since attribute
15661 can make operands unaligned. */
15662 if (can_create_pseudo_p ()
15663 && SSE_REG_MODE_P (mode)
15664 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15665 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15669 /* ix86_expand_vector_move_misalign() does not like constants ... */
15670 if (CONSTANT_P (op1)
15671 || (GET_CODE (op1) == SUBREG
15672 && CONSTANT_P (SUBREG_REG (op1))))
15673 op1 = validize_mem (force_const_mem (mode, op1));
15675 /* ... nor both arguments in memory. */
15676 if (!register_operand (op0, mode)
15677 && !register_operand (op1, mode))
15678 op1 = force_reg (mode, op1);
15680 tmp[0] = op0; tmp[1] = op1;
15681 ix86_expand_vector_move_misalign (mode, tmp);
15685 /* Make operand1 a register if it isn't already. */
15686 if (can_create_pseudo_p ()
15687 && !register_operand (op0, mode)
15688 && !register_operand (op1, mode))
15690 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15694 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15697 /* Split 32-byte AVX unaligned load and store if needed. */
15700 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15703 rtx (*extract) (rtx, rtx, rtx);
15704 rtx (*move_unaligned) (rtx, rtx);
15705 enum machine_mode mode;
15707 switch (GET_MODE (op0))
15710 gcc_unreachable ();
15712 extract = gen_avx_vextractf128v32qi;
15713 move_unaligned = gen_avx_movdqu256;
15717 extract = gen_avx_vextractf128v8sf;
15718 move_unaligned = gen_avx_movups256;
15722 extract = gen_avx_vextractf128v4df;
15723 move_unaligned = gen_avx_movupd256;
15728 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15730 rtx r = gen_reg_rtx (mode);
15731 m = adjust_address (op1, mode, 0);
15732 emit_move_insn (r, m);
15733 m = adjust_address (op1, mode, 16);
15734 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15735 emit_move_insn (op0, r);
15737 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15739 m = adjust_address (op0, mode, 0);
15740 emit_insn (extract (m, op1, const0_rtx));
15741 m = adjust_address (op0, mode, 16);
15742 emit_insn (extract (m, op1, const1_rtx));
15745 emit_insn (move_unaligned (op0, op1));
15748 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15749 straight to ix86_expand_vector_move. */
15750 /* Code generation for scalar reg-reg moves of single and double precision data:
15751 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15755 if (x86_sse_partial_reg_dependency == true)
15760 Code generation for scalar loads of double precision data:
15761 if (x86_sse_split_regs == true)
15762 movlpd mem, reg (gas syntax)
15766 Code generation for unaligned packed loads of single precision data
15767 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15768 if (x86_sse_unaligned_move_optimal)
15771 if (x86_sse_partial_reg_dependency == true)
15783 Code generation for unaligned packed loads of double precision data
15784 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15785 if (x86_sse_unaligned_move_optimal)
15788 if (x86_sse_split_regs == true)
15801 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15810 switch (GET_MODE_CLASS (mode))
15812 case MODE_VECTOR_INT:
15814 switch (GET_MODE_SIZE (mode))
15817 /* If we're optimizing for size, movups is the smallest. */
15818 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15820 op0 = gen_lowpart (V4SFmode, op0);
15821 op1 = gen_lowpart (V4SFmode, op1);
15822 emit_insn (gen_sse_movups (op0, op1));
15825 op0 = gen_lowpart (V16QImode, op0);
15826 op1 = gen_lowpart (V16QImode, op1);
15827 emit_insn (gen_sse2_movdqu (op0, op1));
15830 op0 = gen_lowpart (V32QImode, op0);
15831 op1 = gen_lowpart (V32QImode, op1);
15832 ix86_avx256_split_vector_move_misalign (op0, op1);
15835 gcc_unreachable ();
15838 case MODE_VECTOR_FLOAT:
15839 op0 = gen_lowpart (mode, op0);
15840 op1 = gen_lowpart (mode, op1);
15845 emit_insn (gen_sse_movups (op0, op1));
15848 ix86_avx256_split_vector_move_misalign (op0, op1);
15851 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15853 op0 = gen_lowpart (V4SFmode, op0);
15854 op1 = gen_lowpart (V4SFmode, op1);
15855 emit_insn (gen_sse_movups (op0, op1));
15858 emit_insn (gen_sse2_movupd (op0, op1));
15861 ix86_avx256_split_vector_move_misalign (op0, op1);
15864 gcc_unreachable ();
15869 gcc_unreachable ();
15877 /* If we're optimizing for size, movups is the smallest. */
15878 if (optimize_insn_for_size_p ()
15879 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15881 op0 = gen_lowpart (V4SFmode, op0);
15882 op1 = gen_lowpart (V4SFmode, op1);
15883 emit_insn (gen_sse_movups (op0, op1));
15887 /* ??? If we have typed data, then it would appear that using
15888 movdqu is the only way to get unaligned data loaded with
15890 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15892 op0 = gen_lowpart (V16QImode, op0);
15893 op1 = gen_lowpart (V16QImode, op1);
15894 emit_insn (gen_sse2_movdqu (op0, op1));
15898 if (TARGET_SSE2 && mode == V2DFmode)
15902 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15904 op0 = gen_lowpart (V2DFmode, op0);
15905 op1 = gen_lowpart (V2DFmode, op1);
15906 emit_insn (gen_sse2_movupd (op0, op1));
15910 /* When SSE registers are split into halves, we can avoid
15911 writing to the top half twice. */
15912 if (TARGET_SSE_SPLIT_REGS)
15914 emit_clobber (op0);
15919 /* ??? Not sure about the best option for the Intel chips.
15920 The following would seem to satisfy; the register is
15921 entirely cleared, breaking the dependency chain. We
15922 then store to the upper half, with a dependency depth
15923 of one. A rumor has it that Intel recommends two movsd
15924 followed by an unpacklpd, but this is unconfirmed. And
15925 given that the dependency depth of the unpacklpd would
15926 still be one, I'm not sure why this would be better. */
15927 zero = CONST0_RTX (V2DFmode);
15930 m = adjust_address (op1, DFmode, 0);
15931 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15932 m = adjust_address (op1, DFmode, 8);
15933 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15937 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15939 op0 = gen_lowpart (V4SFmode, op0);
15940 op1 = gen_lowpart (V4SFmode, op1);
15941 emit_insn (gen_sse_movups (op0, op1));
15945 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15946 emit_move_insn (op0, CONST0_RTX (mode));
15948 emit_clobber (op0);
15950 if (mode != V4SFmode)
15951 op0 = gen_lowpart (V4SFmode, op0);
15952 m = adjust_address (op1, V2SFmode, 0);
15953 emit_insn (gen_sse_loadlps (op0, op0, m));
15954 m = adjust_address (op1, V2SFmode, 8);
15955 emit_insn (gen_sse_loadhps (op0, op0, m));
15958 else if (MEM_P (op0))
15960 /* If we're optimizing for size, movups is the smallest. */
15961 if (optimize_insn_for_size_p ()
15962 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15964 op0 = gen_lowpart (V4SFmode, op0);
15965 op1 = gen_lowpart (V4SFmode, op1);
15966 emit_insn (gen_sse_movups (op0, op1));
15970 /* ??? Similar to above, only less clear because of quote
15971 typeless stores unquote. */
15972 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15973 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15975 op0 = gen_lowpart (V16QImode, op0);
15976 op1 = gen_lowpart (V16QImode, op1);
15977 emit_insn (gen_sse2_movdqu (op0, op1));
15981 if (TARGET_SSE2 && mode == V2DFmode)
15983 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15985 op0 = gen_lowpart (V2DFmode, op0);
15986 op1 = gen_lowpart (V2DFmode, op1);
15987 emit_insn (gen_sse2_movupd (op0, op1));
15991 m = adjust_address (op0, DFmode, 0);
15992 emit_insn (gen_sse2_storelpd (m, op1));
15993 m = adjust_address (op0, DFmode, 8);
15994 emit_insn (gen_sse2_storehpd (m, op1));
15999 if (mode != V4SFmode)
16000 op1 = gen_lowpart (V4SFmode, op1);
16002 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
16004 op0 = gen_lowpart (V4SFmode, op0);
16005 emit_insn (gen_sse_movups (op0, op1));
16009 m = adjust_address (op0, V2SFmode, 0);
16010 emit_insn (gen_sse_storelps (m, op1));
16011 m = adjust_address (op0, V2SFmode, 8);
16012 emit_insn (gen_sse_storehps (m, op1));
16017 gcc_unreachable ();
16020 /* Expand a push in MODE. This is some mode for which we do not support
16021 proper push instructions, at least from the registers that we expect
16022 the value to live in. */
16025 ix86_expand_push (enum machine_mode mode, rtx x)
16029 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
16030 GEN_INT (-GET_MODE_SIZE (mode)),
16031 stack_pointer_rtx, 1, OPTAB_DIRECT);
16032 if (tmp != stack_pointer_rtx)
16033 emit_move_insn (stack_pointer_rtx, tmp);
16035 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
16037 /* When we push an operand onto stack, it has to be aligned at least
16038 at the function argument boundary. However since we don't have
16039 the argument type, we can't determine the actual argument
16041 emit_move_insn (tmp, x);
16044 /* Helper function of ix86_fixup_binary_operands to canonicalize
16045 operand order. Returns true if the operands should be swapped. */
16048 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
16051 rtx dst = operands[0];
16052 rtx src1 = operands[1];
16053 rtx src2 = operands[2];
16055 /* If the operation is not commutative, we can't do anything. */
16056 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
16059 /* Highest priority is that src1 should match dst. */
16060 if (rtx_equal_p (dst, src1))
16062 if (rtx_equal_p (dst, src2))
16065 /* Next highest priority is that immediate constants come second. */
16066 if (immediate_operand (src2, mode))
16068 if (immediate_operand (src1, mode))
16071 /* Lowest priority is that memory references should come second. */
16081 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16082 destination to use for the operation. If different from the true
16083 destination in operands[0], a copy operation will be required. */
16086 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
16089 rtx dst = operands[0];
16090 rtx src1 = operands[1];
16091 rtx src2 = operands[2];
16093 /* Canonicalize operand order. */
16094 if (ix86_swap_binary_operands_p (code, mode, operands))
16098 /* It is invalid to swap operands of different modes. */
16099 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
16106 /* Both source operands cannot be in memory. */
16107 if (MEM_P (src1) && MEM_P (src2))
16109 /* Optimization: Only read from memory once. */
16110 if (rtx_equal_p (src1, src2))
16112 src2 = force_reg (mode, src2);
16116 src2 = force_reg (mode, src2);
16119 /* If the destination is memory, and we do not have matching source
16120 operands, do things in registers. */
16121 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16122 dst = gen_reg_rtx (mode);
16124 /* Source 1 cannot be a constant. */
16125 if (CONSTANT_P (src1))
16126 src1 = force_reg (mode, src1);
16128 /* Source 1 cannot be a non-matching memory. */
16129 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16130 src1 = force_reg (mode, src1);
16132 /* Improve address combine. */
16134 && GET_MODE_CLASS (mode) == MODE_INT
16136 src2 = force_reg (mode, src2);
16138 operands[1] = src1;
16139 operands[2] = src2;
16143 /* Similarly, but assume that the destination has already been
16144 set up properly. */
16147 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
16148 enum machine_mode mode, rtx operands[])
16150 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
16151 gcc_assert (dst == operands[0]);
16154 /* Attempt to expand a binary operator. Make the expansion closer to the
16155 actual machine, then just general_operand, which will allow 3 separate
16156 memory references (one output, two input) in a single insn. */
16159 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
16162 rtx src1, src2, dst, op, clob;
16164 dst = ix86_fixup_binary_operands (code, mode, operands);
16165 src1 = operands[1];
16166 src2 = operands[2];
16168 /* Emit the instruction. */
16170 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
16171 if (reload_in_progress)
16173 /* Reload doesn't know about the flags register, and doesn't know that
16174 it doesn't want to clobber it. We can only do this with PLUS. */
16175 gcc_assert (code == PLUS);
16178 else if (reload_completed
16180 && !rtx_equal_p (dst, src1))
16182 /* This is going to be an LEA; avoid splitting it later. */
16187 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16188 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16191 /* Fix up the destination if needed. */
16192 if (dst != operands[0])
16193 emit_move_insn (operands[0], dst);
16196 /* Return TRUE or FALSE depending on whether the binary operator meets the
16197 appropriate constraints. */
16200 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16203 rtx dst = operands[0];
16204 rtx src1 = operands[1];
16205 rtx src2 = operands[2];
16207 /* Both source operands cannot be in memory. */
16208 if (MEM_P (src1) && MEM_P (src2))
16211 /* Canonicalize operand order for commutative operators. */
16212 if (ix86_swap_binary_operands_p (code, mode, operands))
16219 /* If the destination is memory, we must have a matching source operand. */
16220 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16223 /* Source 1 cannot be a constant. */
16224 if (CONSTANT_P (src1))
16227 /* Source 1 cannot be a non-matching memory. */
16228 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16229 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16230 return (code == AND
16233 || (TARGET_64BIT && mode == DImode))
16234 && satisfies_constraint_L (src2));
16239 /* Attempt to expand a unary operator. Make the expansion closer to the
16240 actual machine, then just general_operand, which will allow 2 separate
16241 memory references (one output, one input) in a single insn. */
16244 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16247 int matching_memory;
16248 rtx src, dst, op, clob;
16253 /* If the destination is memory, and we do not have matching source
16254 operands, do things in registers. */
16255 matching_memory = 0;
16258 if (rtx_equal_p (dst, src))
16259 matching_memory = 1;
16261 dst = gen_reg_rtx (mode);
16264 /* When source operand is memory, destination must match. */
16265 if (MEM_P (src) && !matching_memory)
16266 src = force_reg (mode, src);
16268 /* Emit the instruction. */
16270 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16271 if (reload_in_progress || code == NOT)
16273 /* Reload doesn't know about the flags register, and doesn't know that
16274 it doesn't want to clobber it. */
16275 gcc_assert (code == NOT);
16280 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16281 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16284 /* Fix up the destination if needed. */
16285 if (dst != operands[0])
16286 emit_move_insn (operands[0], dst);
16289 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16290 divisor are within the range [0-255]. */
16293 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16296 rtx end_label, qimode_label;
16297 rtx insn, div, mod;
16298 rtx scratch, tmp0, tmp1, tmp2;
16299 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16300 rtx (*gen_zero_extend) (rtx, rtx);
16301 rtx (*gen_test_ccno_1) (rtx, rtx);
16306 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16307 gen_test_ccno_1 = gen_testsi_ccno_1;
16308 gen_zero_extend = gen_zero_extendqisi2;
16311 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16312 gen_test_ccno_1 = gen_testdi_ccno_1;
16313 gen_zero_extend = gen_zero_extendqidi2;
16316 gcc_unreachable ();
16319 end_label = gen_label_rtx ();
16320 qimode_label = gen_label_rtx ();
16322 scratch = gen_reg_rtx (mode);
16324 /* Use 8bit unsigned divimod if dividend and divisor are within
16325 the range [0-255]. */
16326 emit_move_insn (scratch, operands[2]);
16327 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16328 scratch, 1, OPTAB_DIRECT);
16329 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16330 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16331 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16332 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16333 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16335 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16336 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16337 JUMP_LABEL (insn) = qimode_label;
16339 /* Generate original signed/unsigned divimod. */
16340 div = gen_divmod4_1 (operands[0], operands[1],
16341 operands[2], operands[3]);
16344 /* Branch to the end. */
16345 emit_jump_insn (gen_jump (end_label));
16348 /* Generate 8bit unsigned divide. */
16349 emit_label (qimode_label);
16350 /* Don't use operands[0] for result of 8bit divide since not all
16351 registers support QImode ZERO_EXTRACT. */
16352 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16353 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16354 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16355 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16359 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16360 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16364 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16365 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16368 /* Extract remainder from AH. */
16369 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16370 if (REG_P (operands[1]))
16371 insn = emit_move_insn (operands[1], tmp1);
16374 /* Need a new scratch register since the old one has result
16376 scratch = gen_reg_rtx (mode);
16377 emit_move_insn (scratch, tmp1);
16378 insn = emit_move_insn (operands[1], scratch);
16380 set_unique_reg_note (insn, REG_EQUAL, mod);
16382 /* Zero extend quotient from AL. */
16383 tmp1 = gen_lowpart (QImode, tmp0);
16384 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16385 set_unique_reg_note (insn, REG_EQUAL, div);
16387 emit_label (end_label);
16390 #define LEA_MAX_STALL (3)
16391 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16393 /* Increase given DISTANCE in half-cycles according to
16394 dependencies between PREV and NEXT instructions.
16395 Add 1 half-cycle if there is no dependency and
16396 go to next cycle if there is some dependecy. */
16398 static unsigned int
16399 increase_distance (rtx prev, rtx next, unsigned int distance)
16404 if (!prev || !next)
16405 return distance + (distance & 1) + 2;
16407 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
16408 return distance + 1;
16410 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16411 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16412 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
16413 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
16414 return distance + (distance & 1) + 2;
16416 return distance + 1;
16419 /* Function checks if instruction INSN defines register number
16420 REGNO1 or REGNO2. */
16423 insn_defines_reg (unsigned int regno1, unsigned int regno2,
16428 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
16429 if (DF_REF_REG_DEF_P (*def_rec)
16430 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16431 && (regno1 == DF_REF_REGNO (*def_rec)
16432 || regno2 == DF_REF_REGNO (*def_rec)))
16440 /* Function checks if instruction INSN uses register number
16441 REGNO as a part of address expression. */
16444 insn_uses_reg_mem (unsigned int regno, rtx insn)
16448 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
16449 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
16455 /* Search backward for non-agu definition of register number REGNO1
16456 or register number REGNO2 in basic block starting from instruction
16457 START up to head of basic block or instruction INSN.
16459 Function puts true value into *FOUND var if definition was found
16460 and false otherwise.
16462 Distance in half-cycles between START and found instruction or head
16463 of BB is added to DISTANCE and returned. */
16466 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16467 rtx insn, int distance,
16468 rtx start, bool *found)
16470 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16478 && distance < LEA_SEARCH_THRESHOLD)
16480 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16482 distance = increase_distance (prev, next, distance);
16483 if (insn_defines_reg (regno1, regno2, prev))
16485 if (recog_memoized (prev) < 0
16486 || get_attr_type (prev) != TYPE_LEA)
16495 if (prev == BB_HEAD (bb))
16498 prev = PREV_INSN (prev);
16504 /* Search backward for non-agu definition of register number REGNO1
16505 or register number REGNO2 in INSN's basic block until
16506 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16507 2. Reach neighbour BBs boundary, or
16508 3. Reach agu definition.
16509 Returns the distance between the non-agu definition point and INSN.
16510 If no definition point, returns -1. */
16513 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16516 basic_block bb = BLOCK_FOR_INSN (insn);
16518 bool found = false;
16520 if (insn != BB_HEAD (bb))
16521 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16522 distance, PREV_INSN (insn),
16525 if (!found && distance < LEA_SEARCH_THRESHOLD)
16529 bool simple_loop = false;
16531 FOR_EACH_EDGE (e, ei, bb->preds)
16534 simple_loop = true;
16539 distance = distance_non_agu_define_in_bb (regno1, regno2,
16541 BB_END (bb), &found);
16544 int shortest_dist = -1;
16545 bool found_in_bb = false;
16547 FOR_EACH_EDGE (e, ei, bb->preds)
16550 = distance_non_agu_define_in_bb (regno1, regno2,
16556 if (shortest_dist < 0)
16557 shortest_dist = bb_dist;
16558 else if (bb_dist > 0)
16559 shortest_dist = MIN (bb_dist, shortest_dist);
16565 distance = shortest_dist;
16569 /* get_attr_type may modify recog data. We want to make sure
16570 that recog data is valid for instruction INSN, on which
16571 distance_non_agu_define is called. INSN is unchanged here. */
16572 extract_insn_cached (insn);
16577 return distance >> 1;
16580 /* Return the distance in half-cycles between INSN and the next
16581 insn that uses register number REGNO in memory address added
16582 to DISTANCE. Return -1 if REGNO0 is set.
16584 Put true value into *FOUND if register usage was found and
16586 Put true value into *REDEFINED if register redefinition was
16587 found and false otherwise. */
16590 distance_agu_use_in_bb (unsigned int regno,
16591 rtx insn, int distance, rtx start,
16592 bool *found, bool *redefined)
16594 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16599 *redefined = false;
16603 && distance < LEA_SEARCH_THRESHOLD)
16605 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16607 distance = increase_distance(prev, next, distance);
16608 if (insn_uses_reg_mem (regno, next))
16610 /* Return DISTANCE if OP0 is used in memory
16611 address in NEXT. */
16616 if (insn_defines_reg (regno, INVALID_REGNUM, next))
16618 /* Return -1 if OP0 is set in NEXT. */
16626 if (next == BB_END (bb))
16629 next = NEXT_INSN (next);
16635 /* Return the distance between INSN and the next insn that uses
16636 register number REGNO0 in memory address. Return -1 if no such
16637 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16640 distance_agu_use (unsigned int regno0, rtx insn)
16642 basic_block bb = BLOCK_FOR_INSN (insn);
16644 bool found = false;
16645 bool redefined = false;
16647 if (insn != BB_END (bb))
16648 distance = distance_agu_use_in_bb (regno0, insn, distance,
16650 &found, &redefined);
16652 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16656 bool simple_loop = false;
16658 FOR_EACH_EDGE (e, ei, bb->succs)
16661 simple_loop = true;
16666 distance = distance_agu_use_in_bb (regno0, insn,
16667 distance, BB_HEAD (bb),
16668 &found, &redefined);
16671 int shortest_dist = -1;
16672 bool found_in_bb = false;
16673 bool redefined_in_bb = false;
16675 FOR_EACH_EDGE (e, ei, bb->succs)
16678 = distance_agu_use_in_bb (regno0, insn,
16679 distance, BB_HEAD (e->dest),
16680 &found_in_bb, &redefined_in_bb);
16683 if (shortest_dist < 0)
16684 shortest_dist = bb_dist;
16685 else if (bb_dist > 0)
16686 shortest_dist = MIN (bb_dist, shortest_dist);
16692 distance = shortest_dist;
16696 if (!found || redefined)
16699 return distance >> 1;
16702 /* Define this macro to tune LEA priority vs ADD, it take effect when
16703 there is a dilemma of choicing LEA or ADD
16704 Negative value: ADD is more preferred than LEA
16706 Positive value: LEA is more preferred than ADD*/
16707 #define IX86_LEA_PRIORITY 0
16709 /* Return true if usage of lea INSN has performance advantage
16710 over a sequence of instructions. Instructions sequence has
16711 SPLIT_COST cycles higher latency than lea latency. */
16714 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
16715 unsigned int regno2, unsigned int split_cost)
16717 int dist_define, dist_use;
16719 dist_define = distance_non_agu_define (regno1, regno2, insn);
16720 dist_use = distance_agu_use (regno0, insn);
16722 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16724 /* If there is no non AGU operand definition, no AGU
16725 operand usage and split cost is 0 then both lea
16726 and non lea variants have same priority. Currently
16727 we prefer lea for 64 bit code and non lea on 32 bit
16729 if (dist_use < 0 && split_cost == 0)
16730 return TARGET_64BIT || IX86_LEA_PRIORITY;
16735 /* With longer definitions distance lea is more preferable.
16736 Here we change it to take into account splitting cost and
16738 dist_define += split_cost + IX86_LEA_PRIORITY;
16740 /* If there is no use in memory addess then we just check
16741 that split cost does not exceed AGU stall. */
16743 return dist_define >= LEA_MAX_STALL;
16745 /* If this insn has both backward non-agu dependence and forward
16746 agu dependence, the one with short distance takes effect. */
16747 return dist_define >= dist_use;
16750 /* Return true if it is legal to clobber flags by INSN and
16751 false otherwise. */
16754 ix86_ok_to_clobber_flags (rtx insn)
16756 basic_block bb = BLOCK_FOR_INSN (insn);
16762 if (NONDEBUG_INSN_P (insn))
16764 for (use = DF_INSN_USES (insn); *use; use++)
16765 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
16768 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
16772 if (insn == BB_END (bb))
16775 insn = NEXT_INSN (insn);
16778 live = df_get_live_out(bb);
16779 return !REGNO_REG_SET_P (live, FLAGS_REG);
16782 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16783 move and add to avoid AGU stalls. */
16786 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
16788 unsigned int regno0 = true_regnum (operands[0]);
16789 unsigned int regno1 = true_regnum (operands[1]);
16790 unsigned int regno2 = true_regnum (operands[2]);
16792 /* Check if we need to optimize. */
16793 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16796 /* Check it is correct to split here. */
16797 if (!ix86_ok_to_clobber_flags(insn))
16800 /* We need to split only adds with non destructive
16801 destination operand. */
16802 if (regno0 == regno1 || regno0 == regno2)
16805 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
16808 /* Return true if we should emit lea instruction instead of mov
16812 ix86_use_lea_for_mov (rtx insn, rtx operands[])
16814 unsigned int regno0;
16815 unsigned int regno1;
16817 /* Check if we need to optimize. */
16818 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16821 /* Use lea for reg to reg moves only. */
16822 if (!REG_P (operands[0]) || !REG_P (operands[1]))
16825 regno0 = true_regnum (operands[0]);
16826 regno1 = true_regnum (operands[1]);
16828 return ix86_lea_outperforms (insn, regno0, regno1, -1, 0);
16831 /* Return true if we need to split lea into a sequence of
16832 instructions to avoid AGU stalls. */
16835 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
16837 unsigned int regno0 = true_regnum (operands[0]) ;
16838 unsigned int regno1 = -1;
16839 unsigned int regno2 = -1;
16840 unsigned int split_cost = 0;
16841 struct ix86_address parts;
16844 /* FIXME: Handle zero-extended addresses. */
16845 if (GET_CODE (operands[1]) == ZERO_EXTEND
16846 || GET_CODE (operands[1]) == AND)
16849 /* Check we need to optimize. */
16850 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16853 /* Check it is correct to split here. */
16854 if (!ix86_ok_to_clobber_flags(insn))
16857 ok = ix86_decompose_address (operands[1], &parts);
16860 /* We should not split into add if non legitimate pic
16861 operand is used as displacement. */
16862 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16866 regno1 = true_regnum (parts.base);
16868 regno2 = true_regnum (parts.index);
16870 /* Compute how many cycles we will add to execution time
16871 if split lea into a sequence of instructions. */
16872 if (parts.base || parts.index)
16874 /* Have to use mov instruction if non desctructive
16875 destination form is used. */
16876 if (regno1 != regno0 && regno2 != regno0)
16879 /* Have to add index to base if both exist. */
16880 if (parts.base && parts.index)
16883 /* Have to use shift and adds if scale is 2 or greater. */
16884 if (parts.scale > 1)
16886 if (regno0 != regno1)
16888 else if (regno2 == regno0)
16891 split_cost += parts.scale;
16894 /* Have to use add instruction with immediate if
16895 disp is non zero. */
16896 if (parts.disp && parts.disp != const0_rtx)
16899 /* Subtract the price of lea. */
16903 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
16906 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16907 matches destination. RTX includes clobber of FLAGS_REG. */
16910 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
16915 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
16916 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16918 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16921 /* Split lea instructions into a sequence of instructions
16922 which are executed on ALU to avoid AGU stalls.
16923 It is assumed that it is allowed to clobber flags register
16924 at lea position. */
16927 ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
16929 unsigned int regno0 = true_regnum (operands[0]) ;
16930 unsigned int regno1 = INVALID_REGNUM;
16931 unsigned int regno2 = INVALID_REGNUM;
16932 struct ix86_address parts;
16936 ok = ix86_decompose_address (operands[1], &parts);
16941 if (GET_MODE (parts.base) != mode)
16942 parts.base = gen_rtx_SUBREG (mode, parts.base, 0);
16943 regno1 = true_regnum (parts.base);
16948 if (GET_MODE (parts.index) != mode)
16949 parts.index = gen_rtx_SUBREG (mode, parts.index, 0);
16950 regno2 = true_regnum (parts.index);
16953 if (parts.scale > 1)
16955 /* Case r1 = r1 + ... */
16956 if (regno1 == regno0)
16958 /* If we have a case r1 = r1 + C * r1 then we
16959 should use multiplication which is very
16960 expensive. Assume cost model is wrong if we
16961 have such case here. */
16962 gcc_assert (regno2 != regno0);
16964 for (adds = parts.scale; adds > 0; adds--)
16965 ix86_emit_binop (PLUS, mode, operands[0], parts.index);
16969 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16970 if (regno0 != regno2)
16971 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16973 /* Use shift for scaling. */
16974 ix86_emit_binop (ASHIFT, mode, operands[0],
16975 GEN_INT (exact_log2 (parts.scale)));
16978 ix86_emit_binop (PLUS, mode, operands[0], parts.base);
16980 if (parts.disp && parts.disp != const0_rtx)
16981 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
16984 else if (!parts.base && !parts.index)
16986 gcc_assert(parts.disp);
16987 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.disp));
16993 if (regno0 != regno2)
16994 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16996 else if (!parts.index)
16998 if (regno0 != regno1)
16999 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
17003 if (regno0 == regno1)
17005 else if (regno0 == regno2)
17009 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
17013 ix86_emit_binop (PLUS, mode, operands[0], tmp);
17016 if (parts.disp && parts.disp != const0_rtx)
17017 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
17021 /* Return true if it is ok to optimize an ADD operation to LEA
17022 operation to avoid flag register consumation. For most processors,
17023 ADD is faster than LEA. For the processors like ATOM, if the
17024 destination register of LEA holds an actual address which will be
17025 used soon, LEA is better and otherwise ADD is better. */
17028 ix86_lea_for_add_ok (rtx insn, rtx operands[])
17030 unsigned int regno0 = true_regnum (operands[0]);
17031 unsigned int regno1 = true_regnum (operands[1]);
17032 unsigned int regno2 = true_regnum (operands[2]);
17034 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17035 if (regno0 != regno1 && regno0 != regno2)
17038 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17041 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
17044 /* Return true if destination reg of SET_BODY is shift count of
17048 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
17054 /* Retrieve destination of SET_BODY. */
17055 switch (GET_CODE (set_body))
17058 set_dest = SET_DEST (set_body);
17059 if (!set_dest || !REG_P (set_dest))
17063 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
17064 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
17072 /* Retrieve shift count of USE_BODY. */
17073 switch (GET_CODE (use_body))
17076 shift_rtx = XEXP (use_body, 1);
17079 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
17080 if (ix86_dep_by_shift_count_body (set_body,
17081 XVECEXP (use_body, 0, i)))
17089 && (GET_CODE (shift_rtx) == ASHIFT
17090 || GET_CODE (shift_rtx) == LSHIFTRT
17091 || GET_CODE (shift_rtx) == ASHIFTRT
17092 || GET_CODE (shift_rtx) == ROTATE
17093 || GET_CODE (shift_rtx) == ROTATERT))
17095 rtx shift_count = XEXP (shift_rtx, 1);
17097 /* Return true if shift count is dest of SET_BODY. */
17098 if (REG_P (shift_count)
17099 && true_regnum (set_dest) == true_regnum (shift_count))
17106 /* Return true if destination reg of SET_INSN is shift count of
17110 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
17112 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
17113 PATTERN (use_insn));
17116 /* Return TRUE or FALSE depending on whether the unary operator meets the
17117 appropriate constraints. */
17120 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
17121 enum machine_mode mode ATTRIBUTE_UNUSED,
17122 rtx operands[2] ATTRIBUTE_UNUSED)
17124 /* If one of operands is memory, source and destination must match. */
17125 if ((MEM_P (operands[0])
17126 || MEM_P (operands[1]))
17127 && ! rtx_equal_p (operands[0], operands[1]))
17132 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17133 are ok, keeping in mind the possible movddup alternative. */
17136 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
17138 if (MEM_P (operands[0]))
17139 return rtx_equal_p (operands[0], operands[1 + high]);
17140 if (MEM_P (operands[1]) && MEM_P (operands[2]))
17141 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
17145 /* Post-reload splitter for converting an SF or DFmode value in an
17146 SSE register into an unsigned SImode. */
17149 ix86_split_convert_uns_si_sse (rtx operands[])
17151 enum machine_mode vecmode;
17152 rtx value, large, zero_or_two31, input, two31, x;
17154 large = operands[1];
17155 zero_or_two31 = operands[2];
17156 input = operands[3];
17157 two31 = operands[4];
17158 vecmode = GET_MODE (large);
17159 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
17161 /* Load up the value into the low element. We must ensure that the other
17162 elements are valid floats -- zero is the easiest such value. */
17165 if (vecmode == V4SFmode)
17166 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
17168 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
17172 input = gen_rtx_REG (vecmode, REGNO (input));
17173 emit_move_insn (value, CONST0_RTX (vecmode));
17174 if (vecmode == V4SFmode)
17175 emit_insn (gen_sse_movss (value, value, input));
17177 emit_insn (gen_sse2_movsd (value, value, input));
17180 emit_move_insn (large, two31);
17181 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
17183 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
17184 emit_insn (gen_rtx_SET (VOIDmode, large, x));
17186 x = gen_rtx_AND (vecmode, zero_or_two31, large);
17187 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
17189 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
17190 emit_insn (gen_rtx_SET (VOIDmode, value, x));
17192 large = gen_rtx_REG (V4SImode, REGNO (large));
17193 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
17195 x = gen_rtx_REG (V4SImode, REGNO (value));
17196 if (vecmode == V4SFmode)
17197 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
17199 emit_insn (gen_sse2_cvttpd2dq (x, value));
17202 emit_insn (gen_xorv4si3 (value, value, large));
17205 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17206 Expects the 64-bit DImode to be supplied in a pair of integral
17207 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17208 -mfpmath=sse, !optimize_size only. */
17211 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
17213 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
17214 rtx int_xmm, fp_xmm;
17215 rtx biases, exponents;
17218 int_xmm = gen_reg_rtx (V4SImode);
17219 if (TARGET_INTER_UNIT_MOVES)
17220 emit_insn (gen_movdi_to_sse (int_xmm, input));
17221 else if (TARGET_SSE_SPLIT_REGS)
17223 emit_clobber (int_xmm);
17224 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
17228 x = gen_reg_rtx (V2DImode);
17229 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
17230 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
17233 x = gen_rtx_CONST_VECTOR (V4SImode,
17234 gen_rtvec (4, GEN_INT (0x43300000UL),
17235 GEN_INT (0x45300000UL),
17236 const0_rtx, const0_rtx));
17237 exponents = validize_mem (force_const_mem (V4SImode, x));
17239 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17240 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
17242 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17243 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17244 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17245 (0x1.0p84 + double(fp_value_hi_xmm)).
17246 Note these exponents differ by 32. */
17248 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
17250 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17251 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17252 real_ldexp (&bias_lo_rvt, &dconst1, 52);
17253 real_ldexp (&bias_hi_rvt, &dconst1, 84);
17254 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
17255 x = const_double_from_real_value (bias_hi_rvt, DFmode);
17256 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
17257 biases = validize_mem (force_const_mem (V2DFmode, biases));
17258 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
17260 /* Add the upper and lower DFmode values together. */
17262 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
17265 x = copy_to_mode_reg (V2DFmode, fp_xmm);
17266 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
17267 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
17270 ix86_expand_vector_extract (false, target, fp_xmm, 0);
17273 /* Not used, but eases macroization of patterns. */
17275 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
17276 rtx input ATTRIBUTE_UNUSED)
17278 gcc_unreachable ();
17281 /* Convert an unsigned SImode value into a DFmode. Only currently used
17282 for SSE, but applicable anywhere. */
17285 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
17287 REAL_VALUE_TYPE TWO31r;
17290 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
17291 NULL, 1, OPTAB_DIRECT);
17293 fp = gen_reg_rtx (DFmode);
17294 emit_insn (gen_floatsidf2 (fp, x));
17296 real_ldexp (&TWO31r, &dconst1, 31);
17297 x = const_double_from_real_value (TWO31r, DFmode);
17299 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
17301 emit_move_insn (target, x);
17304 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17305 32-bit mode; otherwise we have a direct convert instruction. */
17308 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
17310 REAL_VALUE_TYPE TWO32r;
17311 rtx fp_lo, fp_hi, x;
17313 fp_lo = gen_reg_rtx (DFmode);
17314 fp_hi = gen_reg_rtx (DFmode);
17316 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
17318 real_ldexp (&TWO32r, &dconst1, 32);
17319 x = const_double_from_real_value (TWO32r, DFmode);
17320 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
17322 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
17324 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
17327 emit_move_insn (target, x);
17330 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17331 For x86_32, -mfpmath=sse, !optimize_size only. */
17333 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
17335 REAL_VALUE_TYPE ONE16r;
17336 rtx fp_hi, fp_lo, int_hi, int_lo, x;
17338 real_ldexp (&ONE16r, &dconst1, 16);
17339 x = const_double_from_real_value (ONE16r, SFmode);
17340 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
17341 NULL, 0, OPTAB_DIRECT);
17342 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
17343 NULL, 0, OPTAB_DIRECT);
17344 fp_hi = gen_reg_rtx (SFmode);
17345 fp_lo = gen_reg_rtx (SFmode);
17346 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
17347 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
17348 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
17350 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
17352 if (!rtx_equal_p (target, fp_hi))
17353 emit_move_insn (target, fp_hi);
17356 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17357 a vector of unsigned ints VAL to vector of floats TARGET. */
17360 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
17363 REAL_VALUE_TYPE TWO16r;
17364 enum machine_mode intmode = GET_MODE (val);
17365 enum machine_mode fltmode = GET_MODE (target);
17366 rtx (*cvt) (rtx, rtx);
17368 if (intmode == V4SImode)
17369 cvt = gen_floatv4siv4sf2;
17371 cvt = gen_floatv8siv8sf2;
17372 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
17373 tmp[0] = force_reg (intmode, tmp[0]);
17374 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
17376 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
17377 NULL_RTX, 1, OPTAB_DIRECT);
17378 tmp[3] = gen_reg_rtx (fltmode);
17379 emit_insn (cvt (tmp[3], tmp[1]));
17380 tmp[4] = gen_reg_rtx (fltmode);
17381 emit_insn (cvt (tmp[4], tmp[2]));
17382 real_ldexp (&TWO16r, &dconst1, 16);
17383 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
17384 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
17385 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
17387 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
17389 if (tmp[7] != target)
17390 emit_move_insn (target, tmp[7]);
17393 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17394 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17395 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17396 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17399 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
17401 REAL_VALUE_TYPE TWO31r;
17402 rtx two31r, tmp[4];
17403 enum machine_mode mode = GET_MODE (val);
17404 enum machine_mode scalarmode = GET_MODE_INNER (mode);
17405 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
17406 rtx (*cmp) (rtx, rtx, rtx, rtx);
17409 for (i = 0; i < 3; i++)
17410 tmp[i] = gen_reg_rtx (mode);
17411 real_ldexp (&TWO31r, &dconst1, 31);
17412 two31r = const_double_from_real_value (TWO31r, scalarmode);
17413 two31r = ix86_build_const_vector (mode, 1, two31r);
17414 two31r = force_reg (mode, two31r);
17417 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
17418 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
17419 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
17420 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
17421 default: gcc_unreachable ();
17423 tmp[3] = gen_rtx_LE (mode, two31r, val);
17424 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
17425 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
17427 if (intmode == V4SImode || TARGET_AVX2)
17428 *xorp = expand_simple_binop (intmode, ASHIFT,
17429 gen_lowpart (intmode, tmp[0]),
17430 GEN_INT (31), NULL_RTX, 0,
17434 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
17435 two31 = ix86_build_const_vector (intmode, 1, two31);
17436 *xorp = expand_simple_binop (intmode, AND,
17437 gen_lowpart (intmode, tmp[0]),
17438 two31, NULL_RTX, 0,
17441 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
17445 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17446 then replicate the value for all elements of the vector
17450 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
17454 enum machine_mode scalar_mode;
17471 n_elt = GET_MODE_NUNITS (mode);
17472 v = rtvec_alloc (n_elt);
17473 scalar_mode = GET_MODE_INNER (mode);
17475 RTVEC_ELT (v, 0) = value;
17477 for (i = 1; i < n_elt; ++i)
17478 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
17480 return gen_rtx_CONST_VECTOR (mode, v);
17483 gcc_unreachable ();
17487 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17488 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17489 for an SSE register. If VECT is true, then replicate the mask for
17490 all elements of the vector register. If INVERT is true, then create
17491 a mask excluding the sign bit. */
17494 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
17496 enum machine_mode vec_mode, imode;
17497 HOST_WIDE_INT hi, lo;
17502 /* Find the sign bit, sign extended to 2*HWI. */
17510 mode = GET_MODE_INNER (mode);
17512 lo = 0x80000000, hi = lo < 0;
17520 mode = GET_MODE_INNER (mode);
17522 if (HOST_BITS_PER_WIDE_INT >= 64)
17523 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
17525 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17530 vec_mode = VOIDmode;
17531 if (HOST_BITS_PER_WIDE_INT >= 64)
17534 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
17541 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17545 lo = ~lo, hi = ~hi;
17551 mask = immed_double_const (lo, hi, imode);
17553 vec = gen_rtvec (2, v, mask);
17554 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
17555 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17562 gcc_unreachable ();
17566 lo = ~lo, hi = ~hi;
17568 /* Force this value into the low part of a fp vector constant. */
17569 mask = immed_double_const (lo, hi, imode);
17570 mask = gen_lowpart (mode, mask);
17572 if (vec_mode == VOIDmode)
17573 return force_reg (mode, mask);
17575 v = ix86_build_const_vector (vec_mode, vect, mask);
17576 return force_reg (vec_mode, v);
17579 /* Generate code for floating point ABS or NEG. */
17582 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17585 rtx mask, set, dst, src;
17586 bool use_sse = false;
17587 bool vector_mode = VECTOR_MODE_P (mode);
17588 enum machine_mode vmode = mode;
17592 else if (mode == TFmode)
17594 else if (TARGET_SSE_MATH)
17596 use_sse = SSE_FLOAT_MODE_P (mode);
17597 if (mode == SFmode)
17599 else if (mode == DFmode)
17603 /* NEG and ABS performed with SSE use bitwise mask operations.
17604 Create the appropriate mask now. */
17606 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17613 set = gen_rtx_fmt_e (code, mode, src);
17614 set = gen_rtx_SET (VOIDmode, dst, set);
17621 use = gen_rtx_USE (VOIDmode, mask);
17623 par = gen_rtvec (2, set, use);
17626 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17627 par = gen_rtvec (3, set, use, clob);
17629 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17635 /* Expand a copysign operation. Special case operand 0 being a constant. */
17638 ix86_expand_copysign (rtx operands[])
17640 enum machine_mode mode, vmode;
17641 rtx dest, op0, op1, mask, nmask;
17643 dest = operands[0];
17647 mode = GET_MODE (dest);
17649 if (mode == SFmode)
17651 else if (mode == DFmode)
17656 if (GET_CODE (op0) == CONST_DOUBLE)
17658 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17660 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17661 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17663 if (mode == SFmode || mode == DFmode)
17665 if (op0 == CONST0_RTX (mode))
17666 op0 = CONST0_RTX (vmode);
17669 rtx v = ix86_build_const_vector (vmode, false, op0);
17671 op0 = force_reg (vmode, v);
17674 else if (op0 != CONST0_RTX (mode))
17675 op0 = force_reg (mode, op0);
17677 mask = ix86_build_signbit_mask (vmode, 0, 0);
17679 if (mode == SFmode)
17680 copysign_insn = gen_copysignsf3_const;
17681 else if (mode == DFmode)
17682 copysign_insn = gen_copysigndf3_const;
17684 copysign_insn = gen_copysigntf3_const;
17686 emit_insn (copysign_insn (dest, op0, op1, mask));
17690 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17692 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17693 mask = ix86_build_signbit_mask (vmode, 0, 0);
17695 if (mode == SFmode)
17696 copysign_insn = gen_copysignsf3_var;
17697 else if (mode == DFmode)
17698 copysign_insn = gen_copysigndf3_var;
17700 copysign_insn = gen_copysigntf3_var;
17702 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17706 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17707 be a constant, and so has already been expanded into a vector constant. */
17710 ix86_split_copysign_const (rtx operands[])
17712 enum machine_mode mode, vmode;
17713 rtx dest, op0, mask, x;
17715 dest = operands[0];
17717 mask = operands[3];
17719 mode = GET_MODE (dest);
17720 vmode = GET_MODE (mask);
17722 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17723 x = gen_rtx_AND (vmode, dest, mask);
17724 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17726 if (op0 != CONST0_RTX (vmode))
17728 x = gen_rtx_IOR (vmode, dest, op0);
17729 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17733 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17734 so we have to do two masks. */
17737 ix86_split_copysign_var (rtx operands[])
17739 enum machine_mode mode, vmode;
17740 rtx dest, scratch, op0, op1, mask, nmask, x;
17742 dest = operands[0];
17743 scratch = operands[1];
17746 nmask = operands[4];
17747 mask = operands[5];
17749 mode = GET_MODE (dest);
17750 vmode = GET_MODE (mask);
17752 if (rtx_equal_p (op0, op1))
17754 /* Shouldn't happen often (it's useless, obviously), but when it does
17755 we'd generate incorrect code if we continue below. */
17756 emit_move_insn (dest, op0);
17760 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17762 gcc_assert (REGNO (op1) == REGNO (scratch));
17764 x = gen_rtx_AND (vmode, scratch, mask);
17765 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17768 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17769 x = gen_rtx_NOT (vmode, dest);
17770 x = gen_rtx_AND (vmode, x, op0);
17771 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17775 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17777 x = gen_rtx_AND (vmode, scratch, mask);
17779 else /* alternative 2,4 */
17781 gcc_assert (REGNO (mask) == REGNO (scratch));
17782 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17783 x = gen_rtx_AND (vmode, scratch, op1);
17785 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17787 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17789 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17790 x = gen_rtx_AND (vmode, dest, nmask);
17792 else /* alternative 3,4 */
17794 gcc_assert (REGNO (nmask) == REGNO (dest));
17796 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17797 x = gen_rtx_AND (vmode, dest, op0);
17799 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17802 x = gen_rtx_IOR (vmode, dest, scratch);
17803 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17806 /* Return TRUE or FALSE depending on whether the first SET in INSN
17807 has source and destination with matching CC modes, and that the
17808 CC mode is at least as constrained as REQ_MODE. */
17811 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17814 enum machine_mode set_mode;
17816 set = PATTERN (insn);
17817 if (GET_CODE (set) == PARALLEL)
17818 set = XVECEXP (set, 0, 0);
17819 gcc_assert (GET_CODE (set) == SET);
17820 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17822 set_mode = GET_MODE (SET_DEST (set));
17826 if (req_mode != CCNOmode
17827 && (req_mode != CCmode
17828 || XEXP (SET_SRC (set), 1) != const0_rtx))
17832 if (req_mode == CCGCmode)
17836 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17840 if (req_mode == CCZmode)
17850 if (set_mode != req_mode)
17855 gcc_unreachable ();
17858 return GET_MODE (SET_SRC (set)) == set_mode;
17861 /* Generate insn patterns to do an integer compare of OPERANDS. */
17864 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17866 enum machine_mode cmpmode;
17869 cmpmode = SELECT_CC_MODE (code, op0, op1);
17870 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17872 /* This is very simple, but making the interface the same as in the
17873 FP case makes the rest of the code easier. */
17874 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17875 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17877 /* Return the test that should be put into the flags user, i.e.
17878 the bcc, scc, or cmov instruction. */
17879 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17882 /* Figure out whether to use ordered or unordered fp comparisons.
17883 Return the appropriate mode to use. */
17886 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17888 /* ??? In order to make all comparisons reversible, we do all comparisons
17889 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17890 all forms trapping and nontrapping comparisons, we can make inequality
17891 comparisons trapping again, since it results in better code when using
17892 FCOM based compares. */
17893 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17897 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17899 enum machine_mode mode = GET_MODE (op0);
17901 if (SCALAR_FLOAT_MODE_P (mode))
17903 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17904 return ix86_fp_compare_mode (code);
17909 /* Only zero flag is needed. */
17910 case EQ: /* ZF=0 */
17911 case NE: /* ZF!=0 */
17913 /* Codes needing carry flag. */
17914 case GEU: /* CF=0 */
17915 case LTU: /* CF=1 */
17916 /* Detect overflow checks. They need just the carry flag. */
17917 if (GET_CODE (op0) == PLUS
17918 && rtx_equal_p (op1, XEXP (op0, 0)))
17922 case GTU: /* CF=0 & ZF=0 */
17923 case LEU: /* CF=1 | ZF=1 */
17924 /* Detect overflow checks. They need just the carry flag. */
17925 if (GET_CODE (op0) == MINUS
17926 && rtx_equal_p (op1, XEXP (op0, 0)))
17930 /* Codes possibly doable only with sign flag when
17931 comparing against zero. */
17932 case GE: /* SF=OF or SF=0 */
17933 case LT: /* SF<>OF or SF=1 */
17934 if (op1 == const0_rtx)
17937 /* For other cases Carry flag is not required. */
17939 /* Codes doable only with sign flag when comparing
17940 against zero, but we miss jump instruction for it
17941 so we need to use relational tests against overflow
17942 that thus needs to be zero. */
17943 case GT: /* ZF=0 & SF=OF */
17944 case LE: /* ZF=1 | SF<>OF */
17945 if (op1 == const0_rtx)
17949 /* strcmp pattern do (use flags) and combine may ask us for proper
17954 gcc_unreachable ();
17958 /* Return the fixed registers used for condition codes. */
17961 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17968 /* If two condition code modes are compatible, return a condition code
17969 mode which is compatible with both. Otherwise, return
17972 static enum machine_mode
17973 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17978 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17981 if ((m1 == CCGCmode && m2 == CCGOCmode)
17982 || (m1 == CCGOCmode && m2 == CCGCmode))
17988 gcc_unreachable ();
18018 /* These are only compatible with themselves, which we already
18025 /* Return a comparison we can do and that it is equivalent to
18026 swap_condition (code) apart possibly from orderedness.
18027 But, never change orderedness if TARGET_IEEE_FP, returning
18028 UNKNOWN in that case if necessary. */
18030 static enum rtx_code
18031 ix86_fp_swap_condition (enum rtx_code code)
18035 case GT: /* GTU - CF=0 & ZF=0 */
18036 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
18037 case GE: /* GEU - CF=0 */
18038 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
18039 case UNLT: /* LTU - CF=1 */
18040 return TARGET_IEEE_FP ? UNKNOWN : GT;
18041 case UNLE: /* LEU - CF=1 | ZF=1 */
18042 return TARGET_IEEE_FP ? UNKNOWN : GE;
18044 return swap_condition (code);
18048 /* Return cost of comparison CODE using the best strategy for performance.
18049 All following functions do use number of instructions as a cost metrics.
18050 In future this should be tweaked to compute bytes for optimize_size and
18051 take into account performance of various instructions on various CPUs. */
18054 ix86_fp_comparison_cost (enum rtx_code code)
18058 /* The cost of code using bit-twiddling on %ah. */
18075 arith_cost = TARGET_IEEE_FP ? 5 : 4;
18079 arith_cost = TARGET_IEEE_FP ? 6 : 4;
18082 gcc_unreachable ();
18085 switch (ix86_fp_comparison_strategy (code))
18087 case IX86_FPCMP_COMI:
18088 return arith_cost > 4 ? 3 : 2;
18089 case IX86_FPCMP_SAHF:
18090 return arith_cost > 4 ? 4 : 3;
18096 /* Return strategy to use for floating-point. We assume that fcomi is always
18097 preferrable where available, since that is also true when looking at size
18098 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18100 enum ix86_fpcmp_strategy
18101 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
18103 /* Do fcomi/sahf based test when profitable. */
18106 return IX86_FPCMP_COMI;
18108 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
18109 return IX86_FPCMP_SAHF;
18111 return IX86_FPCMP_ARITH;
18114 /* Swap, force into registers, or otherwise massage the two operands
18115 to a fp comparison. The operands are updated in place; the new
18116 comparison code is returned. */
18118 static enum rtx_code
18119 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
18121 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
18122 rtx op0 = *pop0, op1 = *pop1;
18123 enum machine_mode op_mode = GET_MODE (op0);
18124 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
18126 /* All of the unordered compare instructions only work on registers.
18127 The same is true of the fcomi compare instructions. The XFmode
18128 compare instructions require registers except when comparing
18129 against zero or when converting operand 1 from fixed point to
18133 && (fpcmp_mode == CCFPUmode
18134 || (op_mode == XFmode
18135 && ! (standard_80387_constant_p (op0) == 1
18136 || standard_80387_constant_p (op1) == 1)
18137 && GET_CODE (op1) != FLOAT)
18138 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
18140 op0 = force_reg (op_mode, op0);
18141 op1 = force_reg (op_mode, op1);
18145 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18146 things around if they appear profitable, otherwise force op0
18147 into a register. */
18149 if (standard_80387_constant_p (op0) == 0
18151 && ! (standard_80387_constant_p (op1) == 0
18154 enum rtx_code new_code = ix86_fp_swap_condition (code);
18155 if (new_code != UNKNOWN)
18158 tmp = op0, op0 = op1, op1 = tmp;
18164 op0 = force_reg (op_mode, op0);
18166 if (CONSTANT_P (op1))
18168 int tmp = standard_80387_constant_p (op1);
18170 op1 = validize_mem (force_const_mem (op_mode, op1));
18174 op1 = force_reg (op_mode, op1);
18177 op1 = force_reg (op_mode, op1);
18181 /* Try to rearrange the comparison to make it cheaper. */
18182 if (ix86_fp_comparison_cost (code)
18183 > ix86_fp_comparison_cost (swap_condition (code))
18184 && (REG_P (op1) || can_create_pseudo_p ()))
18187 tmp = op0, op0 = op1, op1 = tmp;
18188 code = swap_condition (code);
18190 op0 = force_reg (op_mode, op0);
18198 /* Convert comparison codes we use to represent FP comparison to integer
18199 code that will result in proper branch. Return UNKNOWN if no such code
18203 ix86_fp_compare_code_to_integer (enum rtx_code code)
18232 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18235 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
18237 enum machine_mode fpcmp_mode, intcmp_mode;
18240 fpcmp_mode = ix86_fp_compare_mode (code);
18241 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
18243 /* Do fcomi/sahf based test when profitable. */
18244 switch (ix86_fp_comparison_strategy (code))
18246 case IX86_FPCMP_COMI:
18247 intcmp_mode = fpcmp_mode;
18248 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18249 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18254 case IX86_FPCMP_SAHF:
18255 intcmp_mode = fpcmp_mode;
18256 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18257 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18261 scratch = gen_reg_rtx (HImode);
18262 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
18263 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
18266 case IX86_FPCMP_ARITH:
18267 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18268 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18269 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
18271 scratch = gen_reg_rtx (HImode);
18272 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
18274 /* In the unordered case, we have to check C2 for NaN's, which
18275 doesn't happen to work out to anything nice combination-wise.
18276 So do some bit twiddling on the value we've got in AH to come
18277 up with an appropriate set of condition codes. */
18279 intcmp_mode = CCNOmode;
18284 if (code == GT || !TARGET_IEEE_FP)
18286 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18291 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18292 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18293 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
18294 intcmp_mode = CCmode;
18300 if (code == LT && TARGET_IEEE_FP)
18302 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18303 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
18304 intcmp_mode = CCmode;
18309 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
18315 if (code == GE || !TARGET_IEEE_FP)
18317 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
18322 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18323 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
18329 if (code == LE && TARGET_IEEE_FP)
18331 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18332 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18333 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18334 intcmp_mode = CCmode;
18339 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18345 if (code == EQ && TARGET_IEEE_FP)
18347 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18348 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18349 intcmp_mode = CCmode;
18354 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18360 if (code == NE && TARGET_IEEE_FP)
18362 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18363 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
18369 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18375 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
18379 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
18384 gcc_unreachable ();
18392 /* Return the test that should be put into the flags user, i.e.
18393 the bcc, scc, or cmov instruction. */
18394 return gen_rtx_fmt_ee (code, VOIDmode,
18395 gen_rtx_REG (intcmp_mode, FLAGS_REG),
18400 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
18404 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
18405 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
18407 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
18409 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
18410 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18413 ret = ix86_expand_int_compare (code, op0, op1);
18419 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
18421 enum machine_mode mode = GET_MODE (op0);
18433 tmp = ix86_expand_compare (code, op0, op1);
18434 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18435 gen_rtx_LABEL_REF (VOIDmode, label),
18437 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18444 /* Expand DImode branch into multiple compare+branch. */
18446 rtx lo[2], hi[2], label2;
18447 enum rtx_code code1, code2, code3;
18448 enum machine_mode submode;
18450 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
18452 tmp = op0, op0 = op1, op1 = tmp;
18453 code = swap_condition (code);
18456 split_double_mode (mode, &op0, 1, lo+0, hi+0);
18457 split_double_mode (mode, &op1, 1, lo+1, hi+1);
18459 submode = mode == DImode ? SImode : DImode;
18461 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18462 avoid two branches. This costs one extra insn, so disable when
18463 optimizing for size. */
18465 if ((code == EQ || code == NE)
18466 && (!optimize_insn_for_size_p ()
18467 || hi[1] == const0_rtx || lo[1] == const0_rtx))
18472 if (hi[1] != const0_rtx)
18473 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
18474 NULL_RTX, 0, OPTAB_WIDEN);
18477 if (lo[1] != const0_rtx)
18478 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
18479 NULL_RTX, 0, OPTAB_WIDEN);
18481 tmp = expand_binop (submode, ior_optab, xor1, xor0,
18482 NULL_RTX, 0, OPTAB_WIDEN);
18484 ix86_expand_branch (code, tmp, const0_rtx, label);
18488 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18489 op1 is a constant and the low word is zero, then we can just
18490 examine the high word. Similarly for low word -1 and
18491 less-or-equal-than or greater-than. */
18493 if (CONST_INT_P (hi[1]))
18496 case LT: case LTU: case GE: case GEU:
18497 if (lo[1] == const0_rtx)
18499 ix86_expand_branch (code, hi[0], hi[1], label);
18503 case LE: case LEU: case GT: case GTU:
18504 if (lo[1] == constm1_rtx)
18506 ix86_expand_branch (code, hi[0], hi[1], label);
18514 /* Otherwise, we need two or three jumps. */
18516 label2 = gen_label_rtx ();
18519 code2 = swap_condition (code);
18520 code3 = unsigned_condition (code);
18524 case LT: case GT: case LTU: case GTU:
18527 case LE: code1 = LT; code2 = GT; break;
18528 case GE: code1 = GT; code2 = LT; break;
18529 case LEU: code1 = LTU; code2 = GTU; break;
18530 case GEU: code1 = GTU; code2 = LTU; break;
18532 case EQ: code1 = UNKNOWN; code2 = NE; break;
18533 case NE: code2 = UNKNOWN; break;
18536 gcc_unreachable ();
18541 * if (hi(a) < hi(b)) goto true;
18542 * if (hi(a) > hi(b)) goto false;
18543 * if (lo(a) < lo(b)) goto true;
18547 if (code1 != UNKNOWN)
18548 ix86_expand_branch (code1, hi[0], hi[1], label);
18549 if (code2 != UNKNOWN)
18550 ix86_expand_branch (code2, hi[0], hi[1], label2);
18552 ix86_expand_branch (code3, lo[0], lo[1], label);
18554 if (code2 != UNKNOWN)
18555 emit_label (label2);
18560 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18565 /* Split branch based on floating point condition. */
18567 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18568 rtx target1, rtx target2, rtx tmp, rtx pushed)
18573 if (target2 != pc_rtx)
18576 code = reverse_condition_maybe_unordered (code);
18581 condition = ix86_expand_fp_compare (code, op1, op2,
18584 /* Remove pushed operand from stack. */
18586 ix86_free_from_memory (GET_MODE (pushed));
18588 i = emit_jump_insn (gen_rtx_SET
18590 gen_rtx_IF_THEN_ELSE (VOIDmode,
18591 condition, target1, target2)));
18592 if (split_branch_probability >= 0)
18593 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18597 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18601 gcc_assert (GET_MODE (dest) == QImode);
18603 ret = ix86_expand_compare (code, op0, op1);
18604 PUT_MODE (ret, QImode);
18605 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18608 /* Expand comparison setting or clearing carry flag. Return true when
18609 successful and set pop for the operation. */
18611 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18613 enum machine_mode mode =
18614 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18616 /* Do not handle double-mode compares that go through special path. */
18617 if (mode == (TARGET_64BIT ? TImode : DImode))
18620 if (SCALAR_FLOAT_MODE_P (mode))
18622 rtx compare_op, compare_seq;
18624 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18626 /* Shortcut: following common codes never translate
18627 into carry flag compares. */
18628 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18629 || code == ORDERED || code == UNORDERED)
18632 /* These comparisons require zero flag; swap operands so they won't. */
18633 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18634 && !TARGET_IEEE_FP)
18639 code = swap_condition (code);
18642 /* Try to expand the comparison and verify that we end up with
18643 carry flag based comparison. This fails to be true only when
18644 we decide to expand comparison using arithmetic that is not
18645 too common scenario. */
18647 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18648 compare_seq = get_insns ();
18651 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18652 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18653 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18655 code = GET_CODE (compare_op);
18657 if (code != LTU && code != GEU)
18660 emit_insn (compare_seq);
18665 if (!INTEGRAL_MODE_P (mode))
18674 /* Convert a==0 into (unsigned)a<1. */
18677 if (op1 != const0_rtx)
18680 code = (code == EQ ? LTU : GEU);
18683 /* Convert a>b into b<a or a>=b-1. */
18686 if (CONST_INT_P (op1))
18688 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18689 /* Bail out on overflow. We still can swap operands but that
18690 would force loading of the constant into register. */
18691 if (op1 == const0_rtx
18692 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18694 code = (code == GTU ? GEU : LTU);
18701 code = (code == GTU ? LTU : GEU);
18705 /* Convert a>=0 into (unsigned)a<0x80000000. */
18708 if (mode == DImode || op1 != const0_rtx)
18710 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18711 code = (code == LT ? GEU : LTU);
18715 if (mode == DImode || op1 != constm1_rtx)
18717 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18718 code = (code == LE ? GEU : LTU);
18724 /* Swapping operands may cause constant to appear as first operand. */
18725 if (!nonimmediate_operand (op0, VOIDmode))
18727 if (!can_create_pseudo_p ())
18729 op0 = force_reg (mode, op0);
18731 *pop = ix86_expand_compare (code, op0, op1);
18732 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18737 ix86_expand_int_movcc (rtx operands[])
18739 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18740 rtx compare_seq, compare_op;
18741 enum machine_mode mode = GET_MODE (operands[0]);
18742 bool sign_bit_compare_p = false;
18743 rtx op0 = XEXP (operands[1], 0);
18744 rtx op1 = XEXP (operands[1], 1);
18747 compare_op = ix86_expand_compare (code, op0, op1);
18748 compare_seq = get_insns ();
18751 compare_code = GET_CODE (compare_op);
18753 if ((op1 == const0_rtx && (code == GE || code == LT))
18754 || (op1 == constm1_rtx && (code == GT || code == LE)))
18755 sign_bit_compare_p = true;
18757 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18758 HImode insns, we'd be swallowed in word prefix ops. */
18760 if ((mode != HImode || TARGET_FAST_PREFIX)
18761 && (mode != (TARGET_64BIT ? TImode : DImode))
18762 && CONST_INT_P (operands[2])
18763 && CONST_INT_P (operands[3]))
18765 rtx out = operands[0];
18766 HOST_WIDE_INT ct = INTVAL (operands[2]);
18767 HOST_WIDE_INT cf = INTVAL (operands[3]);
18768 HOST_WIDE_INT diff;
18771 /* Sign bit compares are better done using shifts than we do by using
18773 if (sign_bit_compare_p
18774 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18776 /* Detect overlap between destination and compare sources. */
18779 if (!sign_bit_compare_p)
18782 bool fpcmp = false;
18784 compare_code = GET_CODE (compare_op);
18786 flags = XEXP (compare_op, 0);
18788 if (GET_MODE (flags) == CCFPmode
18789 || GET_MODE (flags) == CCFPUmode)
18793 = ix86_fp_compare_code_to_integer (compare_code);
18796 /* To simplify rest of code, restrict to the GEU case. */
18797 if (compare_code == LTU)
18799 HOST_WIDE_INT tmp = ct;
18802 compare_code = reverse_condition (compare_code);
18803 code = reverse_condition (code);
18808 PUT_CODE (compare_op,
18809 reverse_condition_maybe_unordered
18810 (GET_CODE (compare_op)));
18812 PUT_CODE (compare_op,
18813 reverse_condition (GET_CODE (compare_op)));
18817 if (reg_overlap_mentioned_p (out, op0)
18818 || reg_overlap_mentioned_p (out, op1))
18819 tmp = gen_reg_rtx (mode);
18821 if (mode == DImode)
18822 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18824 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18825 flags, compare_op));
18829 if (code == GT || code == GE)
18830 code = reverse_condition (code);
18833 HOST_WIDE_INT tmp = ct;
18838 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18851 tmp = expand_simple_binop (mode, PLUS,
18853 copy_rtx (tmp), 1, OPTAB_DIRECT);
18864 tmp = expand_simple_binop (mode, IOR,
18866 copy_rtx (tmp), 1, OPTAB_DIRECT);
18868 else if (diff == -1 && ct)
18878 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18880 tmp = expand_simple_binop (mode, PLUS,
18881 copy_rtx (tmp), GEN_INT (cf),
18882 copy_rtx (tmp), 1, OPTAB_DIRECT);
18890 * andl cf - ct, dest
18900 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18903 tmp = expand_simple_binop (mode, AND,
18905 gen_int_mode (cf - ct, mode),
18906 copy_rtx (tmp), 1, OPTAB_DIRECT);
18908 tmp = expand_simple_binop (mode, PLUS,
18909 copy_rtx (tmp), GEN_INT (ct),
18910 copy_rtx (tmp), 1, OPTAB_DIRECT);
18913 if (!rtx_equal_p (tmp, out))
18914 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18921 enum machine_mode cmp_mode = GET_MODE (op0);
18924 tmp = ct, ct = cf, cf = tmp;
18927 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18929 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18931 /* We may be reversing unordered compare to normal compare, that
18932 is not valid in general (we may convert non-trapping condition
18933 to trapping one), however on i386 we currently emit all
18934 comparisons unordered. */
18935 compare_code = reverse_condition_maybe_unordered (compare_code);
18936 code = reverse_condition_maybe_unordered (code);
18940 compare_code = reverse_condition (compare_code);
18941 code = reverse_condition (code);
18945 compare_code = UNKNOWN;
18946 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18947 && CONST_INT_P (op1))
18949 if (op1 == const0_rtx
18950 && (code == LT || code == GE))
18951 compare_code = code;
18952 else if (op1 == constm1_rtx)
18956 else if (code == GT)
18961 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18962 if (compare_code != UNKNOWN
18963 && GET_MODE (op0) == GET_MODE (out)
18964 && (cf == -1 || ct == -1))
18966 /* If lea code below could be used, only optimize
18967 if it results in a 2 insn sequence. */
18969 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18970 || diff == 3 || diff == 5 || diff == 9)
18971 || (compare_code == LT && ct == -1)
18972 || (compare_code == GE && cf == -1))
18975 * notl op1 (if necessary)
18983 code = reverse_condition (code);
18986 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18988 out = expand_simple_binop (mode, IOR,
18990 out, 1, OPTAB_DIRECT);
18991 if (out != operands[0])
18992 emit_move_insn (operands[0], out);
18999 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
19000 || diff == 3 || diff == 5 || diff == 9)
19001 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
19003 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
19009 * lea cf(dest*(ct-cf)),dest
19013 * This also catches the degenerate setcc-only case.
19019 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19022 /* On x86_64 the lea instruction operates on Pmode, so we need
19023 to get arithmetics done in proper mode to match. */
19025 tmp = copy_rtx (out);
19029 out1 = copy_rtx (out);
19030 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
19034 tmp = gen_rtx_PLUS (mode, tmp, out1);
19040 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
19043 if (!rtx_equal_p (tmp, out))
19046 out = force_operand (tmp, copy_rtx (out));
19048 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
19050 if (!rtx_equal_p (out, operands[0]))
19051 emit_move_insn (operands[0], copy_rtx (out));
19057 * General case: Jumpful:
19058 * xorl dest,dest cmpl op1, op2
19059 * cmpl op1, op2 movl ct, dest
19060 * setcc dest jcc 1f
19061 * decl dest movl cf, dest
19062 * andl (cf-ct),dest 1:
19065 * Size 20. Size 14.
19067 * This is reasonably steep, but branch mispredict costs are
19068 * high on modern cpus, so consider failing only if optimizing
19072 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19073 && BRANCH_COST (optimize_insn_for_speed_p (),
19078 enum machine_mode cmp_mode = GET_MODE (op0);
19083 if (SCALAR_FLOAT_MODE_P (cmp_mode))
19085 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
19087 /* We may be reversing unordered compare to normal compare,
19088 that is not valid in general (we may convert non-trapping
19089 condition to trapping one), however on i386 we currently
19090 emit all comparisons unordered. */
19091 code = reverse_condition_maybe_unordered (code);
19095 code = reverse_condition (code);
19096 if (compare_code != UNKNOWN)
19097 compare_code = reverse_condition (compare_code);
19101 if (compare_code != UNKNOWN)
19103 /* notl op1 (if needed)
19108 For x < 0 (resp. x <= -1) there will be no notl,
19109 so if possible swap the constants to get rid of the
19111 True/false will be -1/0 while code below (store flag
19112 followed by decrement) is 0/-1, so the constants need
19113 to be exchanged once more. */
19115 if (compare_code == GE || !cf)
19117 code = reverse_condition (code);
19122 HOST_WIDE_INT tmp = cf;
19127 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
19131 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19133 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
19135 copy_rtx (out), 1, OPTAB_DIRECT);
19138 out = expand_simple_binop (mode, AND, copy_rtx (out),
19139 gen_int_mode (cf - ct, mode),
19140 copy_rtx (out), 1, OPTAB_DIRECT);
19142 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
19143 copy_rtx (out), 1, OPTAB_DIRECT);
19144 if (!rtx_equal_p (out, operands[0]))
19145 emit_move_insn (operands[0], copy_rtx (out));
19151 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19153 /* Try a few things more with specific constants and a variable. */
19156 rtx var, orig_out, out, tmp;
19158 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19161 /* If one of the two operands is an interesting constant, load a
19162 constant with the above and mask it in with a logical operation. */
19164 if (CONST_INT_P (operands[2]))
19167 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
19168 operands[3] = constm1_rtx, op = and_optab;
19169 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
19170 operands[3] = const0_rtx, op = ior_optab;
19174 else if (CONST_INT_P (operands[3]))
19177 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
19178 operands[2] = constm1_rtx, op = and_optab;
19179 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
19180 operands[2] = const0_rtx, op = ior_optab;
19187 orig_out = operands[0];
19188 tmp = gen_reg_rtx (mode);
19191 /* Recurse to get the constant loaded. */
19192 if (ix86_expand_int_movcc (operands) == 0)
19195 /* Mask in the interesting variable. */
19196 out = expand_binop (mode, op, var, tmp, orig_out, 0,
19198 if (!rtx_equal_p (out, orig_out))
19199 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
19205 * For comparison with above,
19215 if (! nonimmediate_operand (operands[2], mode))
19216 operands[2] = force_reg (mode, operands[2]);
19217 if (! nonimmediate_operand (operands[3], mode))
19218 operands[3] = force_reg (mode, operands[3]);
19220 if (! register_operand (operands[2], VOIDmode)
19222 || ! register_operand (operands[3], VOIDmode)))
19223 operands[2] = force_reg (mode, operands[2]);
19226 && ! register_operand (operands[3], VOIDmode))
19227 operands[3] = force_reg (mode, operands[3]);
19229 emit_insn (compare_seq);
19230 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19231 gen_rtx_IF_THEN_ELSE (mode,
19232 compare_op, operands[2],
19237 /* Swap, force into registers, or otherwise massage the two operands
19238 to an sse comparison with a mask result. Thus we differ a bit from
19239 ix86_prepare_fp_compare_args which expects to produce a flags result.
19241 The DEST operand exists to help determine whether to commute commutative
19242 operators. The POP0/POP1 operands are updated in place. The new
19243 comparison code is returned, or UNKNOWN if not implementable. */
19245 static enum rtx_code
19246 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
19247 rtx *pop0, rtx *pop1)
19255 /* AVX supports all the needed comparisons. */
19258 /* We have no LTGT as an operator. We could implement it with
19259 NE & ORDERED, but this requires an extra temporary. It's
19260 not clear that it's worth it. */
19267 /* These are supported directly. */
19274 /* AVX has 3 operand comparisons, no need to swap anything. */
19277 /* For commutative operators, try to canonicalize the destination
19278 operand to be first in the comparison - this helps reload to
19279 avoid extra moves. */
19280 if (!dest || !rtx_equal_p (dest, *pop1))
19288 /* These are not supported directly before AVX, and furthermore
19289 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19290 comparison operands to transform into something that is
19295 code = swap_condition (code);
19299 gcc_unreachable ();
19305 /* Detect conditional moves that exactly match min/max operational
19306 semantics. Note that this is IEEE safe, as long as we don't
19307 interchange the operands.
19309 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19310 and TRUE if the operation is successful and instructions are emitted. */
19313 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
19314 rtx cmp_op1, rtx if_true, rtx if_false)
19316 enum machine_mode mode;
19322 else if (code == UNGE)
19325 if_true = if_false;
19331 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
19333 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
19338 mode = GET_MODE (dest);
19340 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19341 but MODE may be a vector mode and thus not appropriate. */
19342 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
19344 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
19347 if_true = force_reg (mode, if_true);
19348 v = gen_rtvec (2, if_true, if_false);
19349 tmp = gen_rtx_UNSPEC (mode, v, u);
19353 code = is_min ? SMIN : SMAX;
19354 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
19357 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
19361 /* Expand an sse vector comparison. Return the register with the result. */
19364 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
19365 rtx op_true, rtx op_false)
19367 enum machine_mode mode = GET_MODE (dest);
19368 enum machine_mode cmp_mode = GET_MODE (cmp_op0);
19371 cmp_op0 = force_reg (cmp_mode, cmp_op0);
19372 if (!nonimmediate_operand (cmp_op1, cmp_mode))
19373 cmp_op1 = force_reg (cmp_mode, cmp_op1);
19376 || reg_overlap_mentioned_p (dest, op_true)
19377 || reg_overlap_mentioned_p (dest, op_false))
19378 dest = gen_reg_rtx (mode);
19380 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
19381 if (cmp_mode != mode)
19383 x = force_reg (cmp_mode, x);
19384 convert_move (dest, x, false);
19387 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19392 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19393 operations. This is used for both scalar and vector conditional moves. */
19396 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
19398 enum machine_mode mode = GET_MODE (dest);
19401 if (vector_all_ones_operand (op_true, mode)
19402 && rtx_equal_p (op_false, CONST0_RTX (mode)))
19404 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
19406 else if (op_false == CONST0_RTX (mode))
19408 op_true = force_reg (mode, op_true);
19409 x = gen_rtx_AND (mode, cmp, op_true);
19410 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19412 else if (op_true == CONST0_RTX (mode))
19414 op_false = force_reg (mode, op_false);
19415 x = gen_rtx_NOT (mode, cmp);
19416 x = gen_rtx_AND (mode, x, op_false);
19417 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19419 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
19421 op_false = force_reg (mode, op_false);
19422 x = gen_rtx_IOR (mode, cmp, op_false);
19423 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19425 else if (TARGET_XOP)
19427 op_true = force_reg (mode, op_true);
19429 if (!nonimmediate_operand (op_false, mode))
19430 op_false = force_reg (mode, op_false);
19432 emit_insn (gen_rtx_SET (mode, dest,
19433 gen_rtx_IF_THEN_ELSE (mode, cmp,
19439 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
19441 if (!nonimmediate_operand (op_true, mode))
19442 op_true = force_reg (mode, op_true);
19444 op_false = force_reg (mode, op_false);
19450 gen = gen_sse4_1_blendvps;
19454 gen = gen_sse4_1_blendvpd;
19462 gen = gen_sse4_1_pblendvb;
19463 dest = gen_lowpart (V16QImode, dest);
19464 op_false = gen_lowpart (V16QImode, op_false);
19465 op_true = gen_lowpart (V16QImode, op_true);
19466 cmp = gen_lowpart (V16QImode, cmp);
19471 gen = gen_avx_blendvps256;
19475 gen = gen_avx_blendvpd256;
19483 gen = gen_avx2_pblendvb;
19484 dest = gen_lowpart (V32QImode, dest);
19485 op_false = gen_lowpart (V32QImode, op_false);
19486 op_true = gen_lowpart (V32QImode, op_true);
19487 cmp = gen_lowpart (V32QImode, cmp);
19495 emit_insn (gen (dest, op_false, op_true, cmp));
19498 op_true = force_reg (mode, op_true);
19500 t2 = gen_reg_rtx (mode);
19502 t3 = gen_reg_rtx (mode);
19506 x = gen_rtx_AND (mode, op_true, cmp);
19507 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
19509 x = gen_rtx_NOT (mode, cmp);
19510 x = gen_rtx_AND (mode, x, op_false);
19511 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
19513 x = gen_rtx_IOR (mode, t3, t2);
19514 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19519 /* Expand a floating-point conditional move. Return true if successful. */
19522 ix86_expand_fp_movcc (rtx operands[])
19524 enum machine_mode mode = GET_MODE (operands[0]);
19525 enum rtx_code code = GET_CODE (operands[1]);
19526 rtx tmp, compare_op;
19527 rtx op0 = XEXP (operands[1], 0);
19528 rtx op1 = XEXP (operands[1], 1);
19530 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
19532 enum machine_mode cmode;
19534 /* Since we've no cmove for sse registers, don't force bad register
19535 allocation just to gain access to it. Deny movcc when the
19536 comparison mode doesn't match the move mode. */
19537 cmode = GET_MODE (op0);
19538 if (cmode == VOIDmode)
19539 cmode = GET_MODE (op1);
19543 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
19544 if (code == UNKNOWN)
19547 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
19548 operands[2], operands[3]))
19551 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
19552 operands[2], operands[3]);
19553 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
19557 /* The floating point conditional move instructions don't directly
19558 support conditions resulting from a signed integer comparison. */
19560 compare_op = ix86_expand_compare (code, op0, op1);
19561 if (!fcmov_comparison_operator (compare_op, VOIDmode))
19563 tmp = gen_reg_rtx (QImode);
19564 ix86_expand_setcc (tmp, code, op0, op1);
19566 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
19569 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19570 gen_rtx_IF_THEN_ELSE (mode, compare_op,
19571 operands[2], operands[3])));
19576 /* Expand a floating-point vector conditional move; a vcond operation
19577 rather than a movcc operation. */
19580 ix86_expand_fp_vcond (rtx operands[])
19582 enum rtx_code code = GET_CODE (operands[3]);
19585 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
19586 &operands[4], &operands[5]);
19587 if (code == UNKNOWN)
19590 switch (GET_CODE (operands[3]))
19593 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
19594 operands[5], operands[0], operands[0]);
19595 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
19596 operands[5], operands[1], operands[2]);
19600 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
19601 operands[5], operands[0], operands[0]);
19602 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
19603 operands[5], operands[1], operands[2]);
19607 gcc_unreachable ();
19609 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
19611 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19615 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
19616 operands[5], operands[1], operands[2]))
19619 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
19620 operands[1], operands[2]);
19621 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19625 /* Expand a signed/unsigned integral vector conditional move. */
19628 ix86_expand_int_vcond (rtx operands[])
19630 enum machine_mode data_mode = GET_MODE (operands[0]);
19631 enum machine_mode mode = GET_MODE (operands[4]);
19632 enum rtx_code code = GET_CODE (operands[3]);
19633 bool negate = false;
19636 cop0 = operands[4];
19637 cop1 = operands[5];
19639 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
19640 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
19641 if ((code == LT || code == GE)
19642 && data_mode == mode
19643 && cop1 == CONST0_RTX (mode)
19644 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
19645 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
19646 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
19647 && (GET_MODE_SIZE (data_mode) == 16
19648 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
19650 rtx negop = operands[2 - (code == LT)];
19651 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
19652 if (negop == CONST1_RTX (data_mode))
19654 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
19655 operands[0], 1, OPTAB_DIRECT);
19656 if (res != operands[0])
19657 emit_move_insn (operands[0], res);
19660 else if (GET_MODE_INNER (data_mode) != DImode
19661 && vector_all_ones_operand (negop, data_mode))
19663 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
19664 operands[0], 0, OPTAB_DIRECT);
19665 if (res != operands[0])
19666 emit_move_insn (operands[0], res);
19671 if (!nonimmediate_operand (cop1, mode))
19672 cop1 = force_reg (mode, cop1);
19673 if (!general_operand (operands[1], data_mode))
19674 operands[1] = force_reg (data_mode, operands[1]);
19675 if (!general_operand (operands[2], data_mode))
19676 operands[2] = force_reg (data_mode, operands[2]);
19678 /* XOP supports all of the comparisons on all 128-bit vector int types. */
19680 && (mode == V16QImode || mode == V8HImode
19681 || mode == V4SImode || mode == V2DImode))
19685 /* Canonicalize the comparison to EQ, GT, GTU. */
19696 code = reverse_condition (code);
19702 code = reverse_condition (code);
19708 code = swap_condition (code);
19709 x = cop0, cop0 = cop1, cop1 = x;
19713 gcc_unreachable ();
19716 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19717 if (mode == V2DImode)
19722 /* SSE4.1 supports EQ. */
19723 if (!TARGET_SSE4_1)
19729 /* SSE4.2 supports GT/GTU. */
19730 if (!TARGET_SSE4_2)
19735 gcc_unreachable ();
19739 /* Unsigned parallel compare is not supported by the hardware.
19740 Play some tricks to turn this into a signed comparison
19744 cop0 = force_reg (mode, cop0);
19754 rtx (*gen_sub3) (rtx, rtx, rtx);
19758 case V8SImode: gen_sub3 = gen_subv8si3; break;
19759 case V4DImode: gen_sub3 = gen_subv4di3; break;
19760 case V4SImode: gen_sub3 = gen_subv4si3; break;
19761 case V2DImode: gen_sub3 = gen_subv2di3; break;
19763 gcc_unreachable ();
19765 /* Subtract (-(INT MAX) - 1) from both operands to make
19767 mask = ix86_build_signbit_mask (mode, true, false);
19768 t1 = gen_reg_rtx (mode);
19769 emit_insn (gen_sub3 (t1, cop0, mask));
19771 t2 = gen_reg_rtx (mode);
19772 emit_insn (gen_sub3 (t2, cop1, mask));
19784 /* Perform a parallel unsigned saturating subtraction. */
19785 x = gen_reg_rtx (mode);
19786 emit_insn (gen_rtx_SET (VOIDmode, x,
19787 gen_rtx_US_MINUS (mode, cop0, cop1)));
19790 cop1 = CONST0_RTX (mode);
19796 gcc_unreachable ();
19801 /* Allow the comparison to be done in one mode, but the movcc to
19802 happen in another mode. */
19803 if (data_mode == mode)
19805 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19806 operands[1+negate], operands[2-negate]);
19810 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
19811 x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]),
19813 operands[1+negate], operands[2-negate]);
19814 x = gen_lowpart (data_mode, x);
19817 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19818 operands[2-negate]);
19822 /* Expand a variable vector permutation. */
19825 ix86_expand_vec_perm (rtx operands[])
19827 rtx target = operands[0];
19828 rtx op0 = operands[1];
19829 rtx op1 = operands[2];
19830 rtx mask = operands[3];
19831 rtx t1, t2, t3, t4, vt, vt2, vec[32];
19832 enum machine_mode mode = GET_MODE (op0);
19833 enum machine_mode maskmode = GET_MODE (mask);
19835 bool one_operand_shuffle = rtx_equal_p (op0, op1);
19837 /* Number of elements in the vector. */
19838 w = GET_MODE_NUNITS (mode);
19839 e = GET_MODE_UNIT_SIZE (mode);
19840 gcc_assert (w <= 32);
19844 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
19846 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19847 an constant shuffle operand. With a tiny bit of effort we can
19848 use VPERMD instead. A re-interpretation stall for V4DFmode is
19849 unfortunate but there's no avoiding it.
19850 Similarly for V16HImode we don't have instructions for variable
19851 shuffling, while for V32QImode we can use after preparing suitable
19852 masks vpshufb; vpshufb; vpermq; vpor. */
19854 if (mode == V16HImode)
19856 maskmode = mode = V32QImode;
19862 maskmode = mode = V8SImode;
19866 t1 = gen_reg_rtx (maskmode);
19868 /* Replicate the low bits of the V4DImode mask into V8SImode:
19870 t1 = { A A B B C C D D }. */
19871 for (i = 0; i < w / 2; ++i)
19872 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
19873 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19874 vt = force_reg (maskmode, vt);
19875 mask = gen_lowpart (maskmode, mask);
19876 if (maskmode == V8SImode)
19877 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
19879 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
19881 /* Multiply the shuffle indicies by two. */
19882 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
19885 /* Add one to the odd shuffle indicies:
19886 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19887 for (i = 0; i < w / 2; ++i)
19889 vec[i * 2] = const0_rtx;
19890 vec[i * 2 + 1] = const1_rtx;
19892 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19893 vt = force_const_mem (maskmode, vt);
19894 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
19897 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19898 operands[3] = mask = t1;
19899 target = gen_lowpart (mode, target);
19900 op0 = gen_lowpart (mode, op0);
19901 op1 = gen_lowpart (mode, op1);
19907 /* The VPERMD and VPERMPS instructions already properly ignore
19908 the high bits of the shuffle elements. No need for us to
19909 perform an AND ourselves. */
19910 if (one_operand_shuffle)
19911 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
19914 t1 = gen_reg_rtx (V8SImode);
19915 t2 = gen_reg_rtx (V8SImode);
19916 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
19917 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
19923 mask = gen_lowpart (V8SFmode, mask);
19924 if (one_operand_shuffle)
19925 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
19928 t1 = gen_reg_rtx (V8SFmode);
19929 t2 = gen_reg_rtx (V8SFmode);
19930 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
19931 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
19937 /* By combining the two 128-bit input vectors into one 256-bit
19938 input vector, we can use VPERMD and VPERMPS for the full
19939 two-operand shuffle. */
19940 t1 = gen_reg_rtx (V8SImode);
19941 t2 = gen_reg_rtx (V8SImode);
19942 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
19943 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
19944 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
19945 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
19949 t1 = gen_reg_rtx (V8SFmode);
19950 t2 = gen_reg_rtx (V8SImode);
19951 mask = gen_lowpart (V4SImode, mask);
19952 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
19953 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
19954 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
19955 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
19959 t1 = gen_reg_rtx (V32QImode);
19960 t2 = gen_reg_rtx (V32QImode);
19961 t3 = gen_reg_rtx (V32QImode);
19962 vt2 = GEN_INT (128);
19963 for (i = 0; i < 32; i++)
19965 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
19966 vt = force_reg (V32QImode, vt);
19967 for (i = 0; i < 32; i++)
19968 vec[i] = i < 16 ? vt2 : const0_rtx;
19969 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
19970 vt2 = force_reg (V32QImode, vt2);
19971 /* From mask create two adjusted masks, which contain the same
19972 bits as mask in the low 7 bits of each vector element.
19973 The first mask will have the most significant bit clear
19974 if it requests element from the same 128-bit lane
19975 and MSB set if it requests element from the other 128-bit lane.
19976 The second mask will have the opposite values of the MSB,
19977 and additionally will have its 128-bit lanes swapped.
19978 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
19979 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
19980 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
19981 stands for other 12 bytes. */
19982 /* The bit whether element is from the same lane or the other
19983 lane is bit 4, so shift it up by 3 to the MSB position. */
19984 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode, t1),
19985 gen_lowpart (V4DImode, mask),
19987 /* Clear MSB bits from the mask just in case it had them set. */
19988 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
19989 /* After this t1 will have MSB set for elements from other lane. */
19990 emit_insn (gen_xorv32qi3 (t1, t1, vt2));
19991 /* Clear bits other than MSB. */
19992 emit_insn (gen_andv32qi3 (t1, t1, vt));
19993 /* Or in the lower bits from mask into t3. */
19994 emit_insn (gen_iorv32qi3 (t3, t1, t2));
19995 /* And invert MSB bits in t1, so MSB is set for elements from the same
19997 emit_insn (gen_xorv32qi3 (t1, t1, vt));
19998 /* Swap 128-bit lanes in t3. */
19999 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20000 gen_lowpart (V4DImode, t3),
20001 const2_rtx, GEN_INT (3),
20002 const0_rtx, const1_rtx));
20003 /* And or in the lower bits from mask into t1. */
20004 emit_insn (gen_iorv32qi3 (t1, t1, t2));
20005 if (one_operand_shuffle)
20007 /* Each of these shuffles will put 0s in places where
20008 element from the other 128-bit lane is needed, otherwise
20009 will shuffle in the requested value. */
20010 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, t3));
20011 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
20012 /* For t3 the 128-bit lanes are swapped again. */
20013 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20014 gen_lowpart (V4DImode, t3),
20015 const2_rtx, GEN_INT (3),
20016 const0_rtx, const1_rtx));
20017 /* And oring both together leads to the result. */
20018 emit_insn (gen_iorv32qi3 (target, t1, t3));
20022 t4 = gen_reg_rtx (V32QImode);
20023 /* Similarly to the above one_operand_shuffle code,
20024 just for repeated twice for each operand. merge_two:
20025 code will merge the two results together. */
20026 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, t3));
20027 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, t3));
20028 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
20029 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
20030 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t4),
20031 gen_lowpart (V4DImode, t4),
20032 const2_rtx, GEN_INT (3),
20033 const0_rtx, const1_rtx));
20034 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20035 gen_lowpart (V4DImode, t3),
20036 const2_rtx, GEN_INT (3),
20037 const0_rtx, const1_rtx));
20038 emit_insn (gen_iorv32qi3 (t4, t2, t4));
20039 emit_insn (gen_iorv32qi3 (t3, t1, t3));
20045 gcc_assert (GET_MODE_SIZE (mode) <= 16);
20052 /* The XOP VPPERM insn supports three inputs. By ignoring the
20053 one_operand_shuffle special case, we avoid creating another
20054 set of constant vectors in memory. */
20055 one_operand_shuffle = false;
20057 /* mask = mask & {2*w-1, ...} */
20058 vt = GEN_INT (2*w - 1);
20062 /* mask = mask & {w-1, ...} */
20063 vt = GEN_INT (w - 1);
20066 for (i = 0; i < w; i++)
20068 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20069 mask = expand_simple_binop (maskmode, AND, mask, vt,
20070 NULL_RTX, 0, OPTAB_DIRECT);
20072 /* For non-QImode operations, convert the word permutation control
20073 into a byte permutation control. */
20074 if (mode != V16QImode)
20076 mask = expand_simple_binop (maskmode, ASHIFT, mask,
20077 GEN_INT (exact_log2 (e)),
20078 NULL_RTX, 0, OPTAB_DIRECT);
20080 /* Convert mask to vector of chars. */
20081 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
20083 /* Replicate each of the input bytes into byte positions:
20084 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20085 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20086 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20087 for (i = 0; i < 16; ++i)
20088 vec[i] = GEN_INT (i/e * e);
20089 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20090 vt = force_const_mem (V16QImode, vt);
20092 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
20094 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
20096 /* Convert it into the byte positions by doing
20097 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20098 for (i = 0; i < 16; ++i)
20099 vec[i] = GEN_INT (i % e);
20100 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20101 vt = force_const_mem (V16QImode, vt);
20102 emit_insn (gen_addv16qi3 (mask, mask, vt));
20105 /* The actual shuffle operations all operate on V16QImode. */
20106 op0 = gen_lowpart (V16QImode, op0);
20107 op1 = gen_lowpart (V16QImode, op1);
20108 target = gen_lowpart (V16QImode, target);
20112 emit_insn (gen_xop_pperm (target, op0, op1, mask));
20114 else if (one_operand_shuffle)
20116 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
20123 /* Shuffle the two input vectors independently. */
20124 t1 = gen_reg_rtx (V16QImode);
20125 t2 = gen_reg_rtx (V16QImode);
20126 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
20127 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
20130 /* Then merge them together. The key is whether any given control
20131 element contained a bit set that indicates the second word. */
20132 mask = operands[3];
20134 if (maskmode == V2DImode && !TARGET_SSE4_1)
20136 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20137 more shuffle to convert the V2DI input mask into a V4SI
20138 input mask. At which point the masking that expand_int_vcond
20139 will work as desired. */
20140 rtx t3 = gen_reg_rtx (V4SImode);
20141 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
20142 const0_rtx, const0_rtx,
20143 const2_rtx, const2_rtx));
20145 maskmode = V4SImode;
20149 for (i = 0; i < w; i++)
20151 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20152 vt = force_reg (maskmode, vt);
20153 mask = expand_simple_binop (maskmode, AND, mask, vt,
20154 NULL_RTX, 0, OPTAB_DIRECT);
20156 xops[0] = gen_lowpart (mode, operands[0]);
20157 xops[1] = gen_lowpart (mode, t2);
20158 xops[2] = gen_lowpart (mode, t1);
20159 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
20162 ok = ix86_expand_int_vcond (xops);
20167 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20168 true if we should do zero extension, else sign extension. HIGH_P is
20169 true if we want the N/2 high elements, else the low elements. */
20172 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
20174 enum machine_mode imode = GET_MODE (operands[1]);
20179 rtx (*unpack)(rtx, rtx);
20180 rtx (*extract)(rtx, rtx) = NULL;
20181 enum machine_mode halfmode = BLKmode;
20187 unpack = gen_avx2_zero_extendv16qiv16hi2;
20189 unpack = gen_avx2_sign_extendv16qiv16hi2;
20190 halfmode = V16QImode;
20192 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
20196 unpack = gen_avx2_zero_extendv8hiv8si2;
20198 unpack = gen_avx2_sign_extendv8hiv8si2;
20199 halfmode = V8HImode;
20201 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
20205 unpack = gen_avx2_zero_extendv4siv4di2;
20207 unpack = gen_avx2_sign_extendv4siv4di2;
20208 halfmode = V4SImode;
20210 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
20214 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
20216 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
20220 unpack = gen_sse4_1_zero_extendv4hiv4si2;
20222 unpack = gen_sse4_1_sign_extendv4hiv4si2;
20226 unpack = gen_sse4_1_zero_extendv2siv2di2;
20228 unpack = gen_sse4_1_sign_extendv2siv2di2;
20231 gcc_unreachable ();
20234 if (GET_MODE_SIZE (imode) == 32)
20236 tmp = gen_reg_rtx (halfmode);
20237 emit_insn (extract (tmp, operands[1]));
20241 /* Shift higher 8 bytes to lower 8 bytes. */
20242 tmp = gen_reg_rtx (imode);
20243 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
20244 gen_lowpart (V1TImode, operands[1]),
20250 emit_insn (unpack (operands[0], tmp));
20254 rtx (*unpack)(rtx, rtx, rtx);
20260 unpack = gen_vec_interleave_highv16qi;
20262 unpack = gen_vec_interleave_lowv16qi;
20266 unpack = gen_vec_interleave_highv8hi;
20268 unpack = gen_vec_interleave_lowv8hi;
20272 unpack = gen_vec_interleave_highv4si;
20274 unpack = gen_vec_interleave_lowv4si;
20277 gcc_unreachable ();
20280 dest = gen_lowpart (imode, operands[0]);
20283 tmp = force_reg (imode, CONST0_RTX (imode));
20285 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
20286 operands[1], pc_rtx, pc_rtx);
20288 emit_insn (unpack (dest, operands[1], tmp));
20292 /* Expand conditional increment or decrement using adb/sbb instructions.
20293 The default case using setcc followed by the conditional move can be
20294 done by generic code. */
20296 ix86_expand_int_addcc (rtx operands[])
20298 enum rtx_code code = GET_CODE (operands[1]);
20300 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
20302 rtx val = const0_rtx;
20303 bool fpcmp = false;
20304 enum machine_mode mode;
20305 rtx op0 = XEXP (operands[1], 0);
20306 rtx op1 = XEXP (operands[1], 1);
20308 if (operands[3] != const1_rtx
20309 && operands[3] != constm1_rtx)
20311 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20313 code = GET_CODE (compare_op);
20315 flags = XEXP (compare_op, 0);
20317 if (GET_MODE (flags) == CCFPmode
20318 || GET_MODE (flags) == CCFPUmode)
20321 code = ix86_fp_compare_code_to_integer (code);
20328 PUT_CODE (compare_op,
20329 reverse_condition_maybe_unordered
20330 (GET_CODE (compare_op)));
20332 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
20335 mode = GET_MODE (operands[0]);
20337 /* Construct either adc or sbb insn. */
20338 if ((code == LTU) == (operands[3] == constm1_rtx))
20343 insn = gen_subqi3_carry;
20346 insn = gen_subhi3_carry;
20349 insn = gen_subsi3_carry;
20352 insn = gen_subdi3_carry;
20355 gcc_unreachable ();
20363 insn = gen_addqi3_carry;
20366 insn = gen_addhi3_carry;
20369 insn = gen_addsi3_carry;
20372 insn = gen_adddi3_carry;
20375 gcc_unreachable ();
20378 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
20384 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20385 but works for floating pointer parameters and nonoffsetable memories.
20386 For pushes, it returns just stack offsets; the values will be saved
20387 in the right order. Maximally three parts are generated. */
20390 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
20395 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
20397 size = (GET_MODE_SIZE (mode) + 4) / 8;
20399 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
20400 gcc_assert (size >= 2 && size <= 4);
20402 /* Optimize constant pool reference to immediates. This is used by fp
20403 moves, that force all constants to memory to allow combining. */
20404 if (MEM_P (operand) && MEM_READONLY_P (operand))
20406 rtx tmp = maybe_get_pool_constant (operand);
20411 if (MEM_P (operand) && !offsettable_memref_p (operand))
20413 /* The only non-offsetable memories we handle are pushes. */
20414 int ok = push_operand (operand, VOIDmode);
20418 operand = copy_rtx (operand);
20419 PUT_MODE (operand, Pmode);
20420 parts[0] = parts[1] = parts[2] = parts[3] = operand;
20424 if (GET_CODE (operand) == CONST_VECTOR)
20426 enum machine_mode imode = int_mode_for_mode (mode);
20427 /* Caution: if we looked through a constant pool memory above,
20428 the operand may actually have a different mode now. That's
20429 ok, since we want to pun this all the way back to an integer. */
20430 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
20431 gcc_assert (operand != NULL);
20437 if (mode == DImode)
20438 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
20443 if (REG_P (operand))
20445 gcc_assert (reload_completed);
20446 for (i = 0; i < size; i++)
20447 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
20449 else if (offsettable_memref_p (operand))
20451 operand = adjust_address (operand, SImode, 0);
20452 parts[0] = operand;
20453 for (i = 1; i < size; i++)
20454 parts[i] = adjust_address (operand, SImode, 4 * i);
20456 else if (GET_CODE (operand) == CONST_DOUBLE)
20461 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
20465 real_to_target (l, &r, mode);
20466 parts[3] = gen_int_mode (l[3], SImode);
20467 parts[2] = gen_int_mode (l[2], SImode);
20470 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
20471 parts[2] = gen_int_mode (l[2], SImode);
20474 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
20477 gcc_unreachable ();
20479 parts[1] = gen_int_mode (l[1], SImode);
20480 parts[0] = gen_int_mode (l[0], SImode);
20483 gcc_unreachable ();
20488 if (mode == TImode)
20489 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
20490 if (mode == XFmode || mode == TFmode)
20492 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
20493 if (REG_P (operand))
20495 gcc_assert (reload_completed);
20496 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
20497 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
20499 else if (offsettable_memref_p (operand))
20501 operand = adjust_address (operand, DImode, 0);
20502 parts[0] = operand;
20503 parts[1] = adjust_address (operand, upper_mode, 8);
20505 else if (GET_CODE (operand) == CONST_DOUBLE)
20510 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
20511 real_to_target (l, &r, mode);
20513 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20514 if (HOST_BITS_PER_WIDE_INT >= 64)
20517 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
20518 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
20521 parts[0] = immed_double_const (l[0], l[1], DImode);
20523 if (upper_mode == SImode)
20524 parts[1] = gen_int_mode (l[2], SImode);
20525 else if (HOST_BITS_PER_WIDE_INT >= 64)
20528 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
20529 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
20532 parts[1] = immed_double_const (l[2], l[3], DImode);
20535 gcc_unreachable ();
20542 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20543 Return false when normal moves are needed; true when all required
20544 insns have been emitted. Operands 2-4 contain the input values
20545 int the correct order; operands 5-7 contain the output values. */
20548 ix86_split_long_move (rtx operands[])
20553 int collisions = 0;
20554 enum machine_mode mode = GET_MODE (operands[0]);
20555 bool collisionparts[4];
20557 /* The DFmode expanders may ask us to move double.
20558 For 64bit target this is single move. By hiding the fact
20559 here we simplify i386.md splitters. */
20560 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
20562 /* Optimize constant pool reference to immediates. This is used by
20563 fp moves, that force all constants to memory to allow combining. */
20565 if (MEM_P (operands[1])
20566 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
20567 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
20568 operands[1] = get_pool_constant (XEXP (operands[1], 0));
20569 if (push_operand (operands[0], VOIDmode))
20571 operands[0] = copy_rtx (operands[0]);
20572 PUT_MODE (operands[0], Pmode);
20575 operands[0] = gen_lowpart (DImode, operands[0]);
20576 operands[1] = gen_lowpart (DImode, operands[1]);
20577 emit_move_insn (operands[0], operands[1]);
20581 /* The only non-offsettable memory we handle is push. */
20582 if (push_operand (operands[0], VOIDmode))
20585 gcc_assert (!MEM_P (operands[0])
20586 || offsettable_memref_p (operands[0]));
20588 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
20589 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
20591 /* When emitting push, take care for source operands on the stack. */
20592 if (push && MEM_P (operands[1])
20593 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
20595 rtx src_base = XEXP (part[1][nparts - 1], 0);
20597 /* Compensate for the stack decrement by 4. */
20598 if (!TARGET_64BIT && nparts == 3
20599 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
20600 src_base = plus_constant (src_base, 4);
20602 /* src_base refers to the stack pointer and is
20603 automatically decreased by emitted push. */
20604 for (i = 0; i < nparts; i++)
20605 part[1][i] = change_address (part[1][i],
20606 GET_MODE (part[1][i]), src_base);
20609 /* We need to do copy in the right order in case an address register
20610 of the source overlaps the destination. */
20611 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
20615 for (i = 0; i < nparts; i++)
20618 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
20619 if (collisionparts[i])
20623 /* Collision in the middle part can be handled by reordering. */
20624 if (collisions == 1 && nparts == 3 && collisionparts [1])
20626 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20627 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20629 else if (collisions == 1
20631 && (collisionparts [1] || collisionparts [2]))
20633 if (collisionparts [1])
20635 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20636 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20640 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
20641 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
20645 /* If there are more collisions, we can't handle it by reordering.
20646 Do an lea to the last part and use only one colliding move. */
20647 else if (collisions > 1)
20653 base = part[0][nparts - 1];
20655 /* Handle the case when the last part isn't valid for lea.
20656 Happens in 64-bit mode storing the 12-byte XFmode. */
20657 if (GET_MODE (base) != Pmode)
20658 base = gen_rtx_REG (Pmode, REGNO (base));
20660 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
20661 part[1][0] = replace_equiv_address (part[1][0], base);
20662 for (i = 1; i < nparts; i++)
20664 tmp = plus_constant (base, UNITS_PER_WORD * i);
20665 part[1][i] = replace_equiv_address (part[1][i], tmp);
20676 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
20677 emit_insn (gen_addsi3 (stack_pointer_rtx,
20678 stack_pointer_rtx, GEN_INT (-4)));
20679 emit_move_insn (part[0][2], part[1][2]);
20681 else if (nparts == 4)
20683 emit_move_insn (part[0][3], part[1][3]);
20684 emit_move_insn (part[0][2], part[1][2]);
20689 /* In 64bit mode we don't have 32bit push available. In case this is
20690 register, it is OK - we will just use larger counterpart. We also
20691 retype memory - these comes from attempt to avoid REX prefix on
20692 moving of second half of TFmode value. */
20693 if (GET_MODE (part[1][1]) == SImode)
20695 switch (GET_CODE (part[1][1]))
20698 part[1][1] = adjust_address (part[1][1], DImode, 0);
20702 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
20706 gcc_unreachable ();
20709 if (GET_MODE (part[1][0]) == SImode)
20710 part[1][0] = part[1][1];
20713 emit_move_insn (part[0][1], part[1][1]);
20714 emit_move_insn (part[0][0], part[1][0]);
20718 /* Choose correct order to not overwrite the source before it is copied. */
20719 if ((REG_P (part[0][0])
20720 && REG_P (part[1][1])
20721 && (REGNO (part[0][0]) == REGNO (part[1][1])
20723 && REGNO (part[0][0]) == REGNO (part[1][2]))
20725 && REGNO (part[0][0]) == REGNO (part[1][3]))))
20727 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
20729 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
20731 operands[2 + i] = part[0][j];
20732 operands[6 + i] = part[1][j];
20737 for (i = 0; i < nparts; i++)
20739 operands[2 + i] = part[0][i];
20740 operands[6 + i] = part[1][i];
20744 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20745 if (optimize_insn_for_size_p ())
20747 for (j = 0; j < nparts - 1; j++)
20748 if (CONST_INT_P (operands[6 + j])
20749 && operands[6 + j] != const0_rtx
20750 && REG_P (operands[2 + j]))
20751 for (i = j; i < nparts - 1; i++)
20752 if (CONST_INT_P (operands[7 + i])
20753 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
20754 operands[7 + i] = operands[2 + j];
20757 for (i = 0; i < nparts; i++)
20758 emit_move_insn (operands[2 + i], operands[6 + i]);
20763 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20764 left shift by a constant, either using a single shift or
20765 a sequence of add instructions. */
20768 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
20770 rtx (*insn)(rtx, rtx, rtx);
20773 || (count * ix86_cost->add <= ix86_cost->shift_const
20774 && !optimize_insn_for_size_p ()))
20776 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
20777 while (count-- > 0)
20778 emit_insn (insn (operand, operand, operand));
20782 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20783 emit_insn (insn (operand, operand, GEN_INT (count)));
20788 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
20790 rtx (*gen_ashl3)(rtx, rtx, rtx);
20791 rtx (*gen_shld)(rtx, rtx, rtx);
20792 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20794 rtx low[2], high[2];
20797 if (CONST_INT_P (operands[2]))
20799 split_double_mode (mode, operands, 2, low, high);
20800 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20802 if (count >= half_width)
20804 emit_move_insn (high[0], low[1]);
20805 emit_move_insn (low[0], const0_rtx);
20807 if (count > half_width)
20808 ix86_expand_ashl_const (high[0], count - half_width, mode);
20812 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20814 if (!rtx_equal_p (operands[0], operands[1]))
20815 emit_move_insn (operands[0], operands[1]);
20817 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
20818 ix86_expand_ashl_const (low[0], count, mode);
20823 split_double_mode (mode, operands, 1, low, high);
20825 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20827 if (operands[1] == const1_rtx)
20829 /* Assuming we've chosen a QImode capable registers, then 1 << N
20830 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20831 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
20833 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
20835 ix86_expand_clear (low[0]);
20836 ix86_expand_clear (high[0]);
20837 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
20839 d = gen_lowpart (QImode, low[0]);
20840 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20841 s = gen_rtx_EQ (QImode, flags, const0_rtx);
20842 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20844 d = gen_lowpart (QImode, high[0]);
20845 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20846 s = gen_rtx_NE (QImode, flags, const0_rtx);
20847 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20850 /* Otherwise, we can get the same results by manually performing
20851 a bit extract operation on bit 5/6, and then performing the two
20852 shifts. The two methods of getting 0/1 into low/high are exactly
20853 the same size. Avoiding the shift in the bit extract case helps
20854 pentium4 a bit; no one else seems to care much either way. */
20857 enum machine_mode half_mode;
20858 rtx (*gen_lshr3)(rtx, rtx, rtx);
20859 rtx (*gen_and3)(rtx, rtx, rtx);
20860 rtx (*gen_xor3)(rtx, rtx, rtx);
20861 HOST_WIDE_INT bits;
20864 if (mode == DImode)
20866 half_mode = SImode;
20867 gen_lshr3 = gen_lshrsi3;
20868 gen_and3 = gen_andsi3;
20869 gen_xor3 = gen_xorsi3;
20874 half_mode = DImode;
20875 gen_lshr3 = gen_lshrdi3;
20876 gen_and3 = gen_anddi3;
20877 gen_xor3 = gen_xordi3;
20881 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
20882 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
20884 x = gen_lowpart (half_mode, operands[2]);
20885 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
20887 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
20888 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
20889 emit_move_insn (low[0], high[0]);
20890 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
20893 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20894 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
20898 if (operands[1] == constm1_rtx)
20900 /* For -1 << N, we can avoid the shld instruction, because we
20901 know that we're shifting 0...31/63 ones into a -1. */
20902 emit_move_insn (low[0], constm1_rtx);
20903 if (optimize_insn_for_size_p ())
20904 emit_move_insn (high[0], low[0]);
20906 emit_move_insn (high[0], constm1_rtx);
20910 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20912 if (!rtx_equal_p (operands[0], operands[1]))
20913 emit_move_insn (operands[0], operands[1]);
20915 split_double_mode (mode, operands, 1, low, high);
20916 emit_insn (gen_shld (high[0], low[0], operands[2]));
20919 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20921 if (TARGET_CMOVE && scratch)
20923 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20924 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20926 ix86_expand_clear (scratch);
20927 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
20931 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
20932 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20934 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
20939 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
20941 rtx (*gen_ashr3)(rtx, rtx, rtx)
20942 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
20943 rtx (*gen_shrd)(rtx, rtx, rtx);
20944 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20946 rtx low[2], high[2];
20949 if (CONST_INT_P (operands[2]))
20951 split_double_mode (mode, operands, 2, low, high);
20952 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20954 if (count == GET_MODE_BITSIZE (mode) - 1)
20956 emit_move_insn (high[0], high[1]);
20957 emit_insn (gen_ashr3 (high[0], high[0],
20958 GEN_INT (half_width - 1)));
20959 emit_move_insn (low[0], high[0]);
20962 else if (count >= half_width)
20964 emit_move_insn (low[0], high[1]);
20965 emit_move_insn (high[0], low[0]);
20966 emit_insn (gen_ashr3 (high[0], high[0],
20967 GEN_INT (half_width - 1)));
20969 if (count > half_width)
20970 emit_insn (gen_ashr3 (low[0], low[0],
20971 GEN_INT (count - half_width)));
20975 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20977 if (!rtx_equal_p (operands[0], operands[1]))
20978 emit_move_insn (operands[0], operands[1]);
20980 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
20981 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
20986 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20988 if (!rtx_equal_p (operands[0], operands[1]))
20989 emit_move_insn (operands[0], operands[1]);
20991 split_double_mode (mode, operands, 1, low, high);
20993 emit_insn (gen_shrd (low[0], high[0], operands[2]));
20994 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
20996 if (TARGET_CMOVE && scratch)
20998 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20999 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21001 emit_move_insn (scratch, high[0]);
21002 emit_insn (gen_ashr3 (scratch, scratch,
21003 GEN_INT (half_width - 1)));
21004 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21009 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
21010 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
21012 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
21018 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
21020 rtx (*gen_lshr3)(rtx, rtx, rtx)
21021 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
21022 rtx (*gen_shrd)(rtx, rtx, rtx);
21023 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21025 rtx low[2], high[2];
21028 if (CONST_INT_P (operands[2]))
21030 split_double_mode (mode, operands, 2, low, high);
21031 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21033 if (count >= half_width)
21035 emit_move_insn (low[0], high[1]);
21036 ix86_expand_clear (high[0]);
21038 if (count > half_width)
21039 emit_insn (gen_lshr3 (low[0], low[0],
21040 GEN_INT (count - half_width)));
21044 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21046 if (!rtx_equal_p (operands[0], operands[1]))
21047 emit_move_insn (operands[0], operands[1]);
21049 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
21050 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
21055 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21057 if (!rtx_equal_p (operands[0], operands[1]))
21058 emit_move_insn (operands[0], operands[1]);
21060 split_double_mode (mode, operands, 1, low, high);
21062 emit_insn (gen_shrd (low[0], high[0], operands[2]));
21063 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
21065 if (TARGET_CMOVE && scratch)
21067 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21068 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21070 ix86_expand_clear (scratch);
21071 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21076 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
21077 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
21079 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
21084 /* Predict just emitted jump instruction to be taken with probability PROB. */
21086 predict_jump (int prob)
21088 rtx insn = get_last_insn ();
21089 gcc_assert (JUMP_P (insn));
21090 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
21093 /* Helper function for the string operations below. Dest VARIABLE whether
21094 it is aligned to VALUE bytes. If true, jump to the label. */
21096 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
21098 rtx label = gen_label_rtx ();
21099 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
21100 if (GET_MODE (variable) == DImode)
21101 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
21103 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
21104 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
21107 predict_jump (REG_BR_PROB_BASE * 50 / 100);
21109 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21113 /* Adjust COUNTER by the VALUE. */
21115 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
21117 rtx (*gen_add)(rtx, rtx, rtx)
21118 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
21120 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
21123 /* Zero extend possibly SImode EXP to Pmode register. */
21125 ix86_zero_extend_to_Pmode (rtx exp)
21128 if (GET_MODE (exp) == VOIDmode)
21129 return force_reg (Pmode, exp);
21130 if (GET_MODE (exp) == Pmode)
21131 return copy_to_mode_reg (Pmode, exp);
21132 r = gen_reg_rtx (Pmode);
21133 emit_insn (gen_zero_extendsidi2 (r, exp));
21137 /* Divide COUNTREG by SCALE. */
21139 scale_counter (rtx countreg, int scale)
21145 if (CONST_INT_P (countreg))
21146 return GEN_INT (INTVAL (countreg) / scale);
21147 gcc_assert (REG_P (countreg));
21149 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
21150 GEN_INT (exact_log2 (scale)),
21151 NULL, 1, OPTAB_DIRECT);
21155 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21156 DImode for constant loop counts. */
21158 static enum machine_mode
21159 counter_mode (rtx count_exp)
21161 if (GET_MODE (count_exp) != VOIDmode)
21162 return GET_MODE (count_exp);
21163 if (!CONST_INT_P (count_exp))
21165 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
21170 /* When SRCPTR is non-NULL, output simple loop to move memory
21171 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21172 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21173 equivalent loop to set memory by VALUE (supposed to be in MODE).
21175 The size is rounded down to whole number of chunk size moved at once.
21176 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21180 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
21181 rtx destptr, rtx srcptr, rtx value,
21182 rtx count, enum machine_mode mode, int unroll,
21185 rtx out_label, top_label, iter, tmp;
21186 enum machine_mode iter_mode = counter_mode (count);
21187 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
21188 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
21194 top_label = gen_label_rtx ();
21195 out_label = gen_label_rtx ();
21196 iter = gen_reg_rtx (iter_mode);
21198 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
21199 NULL, 1, OPTAB_DIRECT);
21200 /* Those two should combine. */
21201 if (piece_size == const1_rtx)
21203 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
21205 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21207 emit_move_insn (iter, const0_rtx);
21209 emit_label (top_label);
21211 tmp = convert_modes (Pmode, iter_mode, iter, true);
21212 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
21213 destmem = change_address (destmem, mode, x_addr);
21217 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
21218 srcmem = change_address (srcmem, mode, y_addr);
21220 /* When unrolling for chips that reorder memory reads and writes,
21221 we can save registers by using single temporary.
21222 Also using 4 temporaries is overkill in 32bit mode. */
21223 if (!TARGET_64BIT && 0)
21225 for (i = 0; i < unroll; i++)
21230 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21232 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21234 emit_move_insn (destmem, srcmem);
21240 gcc_assert (unroll <= 4);
21241 for (i = 0; i < unroll; i++)
21243 tmpreg[i] = gen_reg_rtx (mode);
21247 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21249 emit_move_insn (tmpreg[i], srcmem);
21251 for (i = 0; i < unroll; i++)
21256 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21258 emit_move_insn (destmem, tmpreg[i]);
21263 for (i = 0; i < unroll; i++)
21267 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21268 emit_move_insn (destmem, value);
21271 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
21272 true, OPTAB_LIB_WIDEN);
21274 emit_move_insn (iter, tmp);
21276 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
21278 if (expected_size != -1)
21280 expected_size /= GET_MODE_SIZE (mode) * unroll;
21281 if (expected_size == 0)
21283 else if (expected_size > REG_BR_PROB_BASE)
21284 predict_jump (REG_BR_PROB_BASE - 1);
21286 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
21289 predict_jump (REG_BR_PROB_BASE * 80 / 100);
21290 iter = ix86_zero_extend_to_Pmode (iter);
21291 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
21292 true, OPTAB_LIB_WIDEN);
21293 if (tmp != destptr)
21294 emit_move_insn (destptr, tmp);
21297 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
21298 true, OPTAB_LIB_WIDEN);
21300 emit_move_insn (srcptr, tmp);
21302 emit_label (out_label);
21305 /* Output "rep; mov" instruction.
21306 Arguments have same meaning as for previous function */
21308 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
21309 rtx destptr, rtx srcptr,
21311 enum machine_mode mode)
21316 HOST_WIDE_INT rounded_count;
21318 /* If the size is known, it is shorter to use rep movs. */
21319 if (mode == QImode && CONST_INT_P (count)
21320 && !(INTVAL (count) & 3))
21323 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21324 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21325 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
21326 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
21327 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21328 if (mode != QImode)
21330 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21331 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21332 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21333 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
21334 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21335 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
21339 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21340 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
21342 if (CONST_INT_P (count))
21344 rounded_count = (INTVAL (count)
21345 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21346 destmem = shallow_copy_rtx (destmem);
21347 srcmem = shallow_copy_rtx (srcmem);
21348 set_mem_size (destmem, rounded_count);
21349 set_mem_size (srcmem, rounded_count);
21353 if (MEM_SIZE_KNOWN_P (destmem))
21354 clear_mem_size (destmem);
21355 if (MEM_SIZE_KNOWN_P (srcmem))
21356 clear_mem_size (srcmem);
21358 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
21362 /* Output "rep; stos" instruction.
21363 Arguments have same meaning as for previous function */
21365 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
21366 rtx count, enum machine_mode mode,
21371 HOST_WIDE_INT rounded_count;
21373 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21374 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21375 value = force_reg (mode, gen_lowpart (mode, value));
21376 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21377 if (mode != QImode)
21379 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21380 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21381 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21384 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21385 if (orig_value == const0_rtx && CONST_INT_P (count))
21387 rounded_count = (INTVAL (count)
21388 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21389 destmem = shallow_copy_rtx (destmem);
21390 set_mem_size (destmem, rounded_count);
21392 else if (MEM_SIZE_KNOWN_P (destmem))
21393 clear_mem_size (destmem);
21394 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
21398 emit_strmov (rtx destmem, rtx srcmem,
21399 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
21401 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
21402 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
21403 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21406 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21408 expand_movmem_epilogue (rtx destmem, rtx srcmem,
21409 rtx destptr, rtx srcptr, rtx count, int max_size)
21412 if (CONST_INT_P (count))
21414 HOST_WIDE_INT countval = INTVAL (count);
21417 if ((countval & 0x10) && max_size > 16)
21421 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
21422 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
21425 gcc_unreachable ();
21428 if ((countval & 0x08) && max_size > 8)
21431 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
21434 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
21435 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
21439 if ((countval & 0x04) && max_size > 4)
21441 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
21444 if ((countval & 0x02) && max_size > 2)
21446 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
21449 if ((countval & 0x01) && max_size > 1)
21451 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
21458 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
21459 count, 1, OPTAB_DIRECT);
21460 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
21461 count, QImode, 1, 4);
21465 /* When there are stringops, we can cheaply increase dest and src pointers.
21466 Otherwise we save code size by maintaining offset (zero is readily
21467 available from preceding rep operation) and using x86 addressing modes.
21469 if (TARGET_SINGLE_STRINGOP)
21473 rtx label = ix86_expand_aligntest (count, 4, true);
21474 src = change_address (srcmem, SImode, srcptr);
21475 dest = change_address (destmem, SImode, destptr);
21476 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21477 emit_label (label);
21478 LABEL_NUSES (label) = 1;
21482 rtx label = ix86_expand_aligntest (count, 2, true);
21483 src = change_address (srcmem, HImode, srcptr);
21484 dest = change_address (destmem, HImode, destptr);
21485 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21486 emit_label (label);
21487 LABEL_NUSES (label) = 1;
21491 rtx label = ix86_expand_aligntest (count, 1, true);
21492 src = change_address (srcmem, QImode, srcptr);
21493 dest = change_address (destmem, QImode, destptr);
21494 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21495 emit_label (label);
21496 LABEL_NUSES (label) = 1;
21501 rtx offset = force_reg (Pmode, const0_rtx);
21506 rtx label = ix86_expand_aligntest (count, 4, true);
21507 src = change_address (srcmem, SImode, srcptr);
21508 dest = change_address (destmem, SImode, destptr);
21509 emit_move_insn (dest, src);
21510 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
21511 true, OPTAB_LIB_WIDEN);
21513 emit_move_insn (offset, tmp);
21514 emit_label (label);
21515 LABEL_NUSES (label) = 1;
21519 rtx label = ix86_expand_aligntest (count, 2, true);
21520 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
21521 src = change_address (srcmem, HImode, tmp);
21522 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
21523 dest = change_address (destmem, HImode, tmp);
21524 emit_move_insn (dest, src);
21525 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
21526 true, OPTAB_LIB_WIDEN);
21528 emit_move_insn (offset, tmp);
21529 emit_label (label);
21530 LABEL_NUSES (label) = 1;
21534 rtx label = ix86_expand_aligntest (count, 1, true);
21535 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
21536 src = change_address (srcmem, QImode, tmp);
21537 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
21538 dest = change_address (destmem, QImode, tmp);
21539 emit_move_insn (dest, src);
21540 emit_label (label);
21541 LABEL_NUSES (label) = 1;
21546 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21548 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
21549 rtx count, int max_size)
21552 expand_simple_binop (counter_mode (count), AND, count,
21553 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
21554 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
21555 gen_lowpart (QImode, value), count, QImode,
21559 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21561 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
21565 if (CONST_INT_P (count))
21567 HOST_WIDE_INT countval = INTVAL (count);
21570 if ((countval & 0x10) && max_size > 16)
21574 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
21575 emit_insn (gen_strset (destptr, dest, value));
21576 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
21577 emit_insn (gen_strset (destptr, dest, value));
21580 gcc_unreachable ();
21583 if ((countval & 0x08) && max_size > 8)
21587 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
21588 emit_insn (gen_strset (destptr, dest, value));
21592 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
21593 emit_insn (gen_strset (destptr, dest, value));
21594 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
21595 emit_insn (gen_strset (destptr, dest, value));
21599 if ((countval & 0x04) && max_size > 4)
21601 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
21602 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
21605 if ((countval & 0x02) && max_size > 2)
21607 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
21608 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
21611 if ((countval & 0x01) && max_size > 1)
21613 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
21614 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
21621 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
21626 rtx label = ix86_expand_aligntest (count, 16, true);
21629 dest = change_address (destmem, DImode, destptr);
21630 emit_insn (gen_strset (destptr, dest, value));
21631 emit_insn (gen_strset (destptr, dest, value));
21635 dest = change_address (destmem, SImode, destptr);
21636 emit_insn (gen_strset (destptr, dest, value));
21637 emit_insn (gen_strset (destptr, dest, value));
21638 emit_insn (gen_strset (destptr, dest, value));
21639 emit_insn (gen_strset (destptr, dest, value));
21641 emit_label (label);
21642 LABEL_NUSES (label) = 1;
21646 rtx label = ix86_expand_aligntest (count, 8, true);
21649 dest = change_address (destmem, DImode, destptr);
21650 emit_insn (gen_strset (destptr, dest, value));
21654 dest = change_address (destmem, SImode, destptr);
21655 emit_insn (gen_strset (destptr, dest, value));
21656 emit_insn (gen_strset (destptr, dest, value));
21658 emit_label (label);
21659 LABEL_NUSES (label) = 1;
21663 rtx label = ix86_expand_aligntest (count, 4, true);
21664 dest = change_address (destmem, SImode, destptr);
21665 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
21666 emit_label (label);
21667 LABEL_NUSES (label) = 1;
21671 rtx label = ix86_expand_aligntest (count, 2, true);
21672 dest = change_address (destmem, HImode, destptr);
21673 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
21674 emit_label (label);
21675 LABEL_NUSES (label) = 1;
21679 rtx label = ix86_expand_aligntest (count, 1, true);
21680 dest = change_address (destmem, QImode, destptr);
21681 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
21682 emit_label (label);
21683 LABEL_NUSES (label) = 1;
21687 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21688 DESIRED_ALIGNMENT. */
21690 expand_movmem_prologue (rtx destmem, rtx srcmem,
21691 rtx destptr, rtx srcptr, rtx count,
21692 int align, int desired_alignment)
21694 if (align <= 1 && desired_alignment > 1)
21696 rtx label = ix86_expand_aligntest (destptr, 1, false);
21697 srcmem = change_address (srcmem, QImode, srcptr);
21698 destmem = change_address (destmem, QImode, destptr);
21699 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21700 ix86_adjust_counter (count, 1);
21701 emit_label (label);
21702 LABEL_NUSES (label) = 1;
21704 if (align <= 2 && desired_alignment > 2)
21706 rtx label = ix86_expand_aligntest (destptr, 2, false);
21707 srcmem = change_address (srcmem, HImode, srcptr);
21708 destmem = change_address (destmem, HImode, destptr);
21709 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21710 ix86_adjust_counter (count, 2);
21711 emit_label (label);
21712 LABEL_NUSES (label) = 1;
21714 if (align <= 4 && desired_alignment > 4)
21716 rtx label = ix86_expand_aligntest (destptr, 4, false);
21717 srcmem = change_address (srcmem, SImode, srcptr);
21718 destmem = change_address (destmem, SImode, destptr);
21719 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21720 ix86_adjust_counter (count, 4);
21721 emit_label (label);
21722 LABEL_NUSES (label) = 1;
21724 gcc_assert (desired_alignment <= 8);
21727 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21728 ALIGN_BYTES is how many bytes need to be copied. */
21730 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
21731 int desired_align, int align_bytes)
21734 rtx orig_dst = dst;
21735 rtx orig_src = src;
21737 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
21738 if (src_align_bytes >= 0)
21739 src_align_bytes = desired_align - src_align_bytes;
21740 if (align_bytes & 1)
21742 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21743 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
21745 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21747 if (align_bytes & 2)
21749 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21750 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
21751 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21752 set_mem_align (dst, 2 * BITS_PER_UNIT);
21753 if (src_align_bytes >= 0
21754 && (src_align_bytes & 1) == (align_bytes & 1)
21755 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
21756 set_mem_align (src, 2 * BITS_PER_UNIT);
21758 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21760 if (align_bytes & 4)
21762 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21763 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
21764 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21765 set_mem_align (dst, 4 * BITS_PER_UNIT);
21766 if (src_align_bytes >= 0)
21768 unsigned int src_align = 0;
21769 if ((src_align_bytes & 3) == (align_bytes & 3))
21771 else if ((src_align_bytes & 1) == (align_bytes & 1))
21773 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21774 set_mem_align (src, src_align * BITS_PER_UNIT);
21777 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21779 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21780 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
21781 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21782 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21783 if (src_align_bytes >= 0)
21785 unsigned int src_align = 0;
21786 if ((src_align_bytes & 7) == (align_bytes & 7))
21788 else if ((src_align_bytes & 3) == (align_bytes & 3))
21790 else if ((src_align_bytes & 1) == (align_bytes & 1))
21792 if (src_align > (unsigned int) desired_align)
21793 src_align = desired_align;
21794 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21795 set_mem_align (src, src_align * BITS_PER_UNIT);
21797 if (MEM_SIZE_KNOWN_P (orig_dst))
21798 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21799 if (MEM_SIZE_KNOWN_P (orig_src))
21800 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
21805 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21806 DESIRED_ALIGNMENT. */
21808 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
21809 int align, int desired_alignment)
21811 if (align <= 1 && desired_alignment > 1)
21813 rtx label = ix86_expand_aligntest (destptr, 1, false);
21814 destmem = change_address (destmem, QImode, destptr);
21815 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
21816 ix86_adjust_counter (count, 1);
21817 emit_label (label);
21818 LABEL_NUSES (label) = 1;
21820 if (align <= 2 && desired_alignment > 2)
21822 rtx label = ix86_expand_aligntest (destptr, 2, false);
21823 destmem = change_address (destmem, HImode, destptr);
21824 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
21825 ix86_adjust_counter (count, 2);
21826 emit_label (label);
21827 LABEL_NUSES (label) = 1;
21829 if (align <= 4 && desired_alignment > 4)
21831 rtx label = ix86_expand_aligntest (destptr, 4, false);
21832 destmem = change_address (destmem, SImode, destptr);
21833 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
21834 ix86_adjust_counter (count, 4);
21835 emit_label (label);
21836 LABEL_NUSES (label) = 1;
21838 gcc_assert (desired_alignment <= 8);
21841 /* Set enough from DST to align DST known to by aligned by ALIGN to
21842 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21844 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
21845 int desired_align, int align_bytes)
21848 rtx orig_dst = dst;
21849 if (align_bytes & 1)
21851 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21853 emit_insn (gen_strset (destreg, dst,
21854 gen_lowpart (QImode, value)));
21856 if (align_bytes & 2)
21858 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21859 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21860 set_mem_align (dst, 2 * BITS_PER_UNIT);
21862 emit_insn (gen_strset (destreg, dst,
21863 gen_lowpart (HImode, value)));
21865 if (align_bytes & 4)
21867 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21868 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21869 set_mem_align (dst, 4 * BITS_PER_UNIT);
21871 emit_insn (gen_strset (destreg, dst,
21872 gen_lowpart (SImode, value)));
21874 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21875 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21876 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21877 if (MEM_SIZE_KNOWN_P (orig_dst))
21878 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21882 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21883 static enum stringop_alg
21884 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
21885 int *dynamic_check)
21887 const struct stringop_algs * algs;
21888 bool optimize_for_speed;
21889 /* Algorithms using the rep prefix want at least edi and ecx;
21890 additionally, memset wants eax and memcpy wants esi. Don't
21891 consider such algorithms if the user has appropriated those
21892 registers for their own purposes. */
21893 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
21895 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
21897 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21898 || (alg != rep_prefix_1_byte \
21899 && alg != rep_prefix_4_byte \
21900 && alg != rep_prefix_8_byte))
21901 const struct processor_costs *cost;
21903 /* Even if the string operation call is cold, we still might spend a lot
21904 of time processing large blocks. */
21905 if (optimize_function_for_size_p (cfun)
21906 || (optimize_insn_for_size_p ()
21907 && expected_size != -1 && expected_size < 256))
21908 optimize_for_speed = false;
21910 optimize_for_speed = true;
21912 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
21914 *dynamic_check = -1;
21916 algs = &cost->memset[TARGET_64BIT != 0];
21918 algs = &cost->memcpy[TARGET_64BIT != 0];
21919 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
21920 return ix86_stringop_alg;
21921 /* rep; movq or rep; movl is the smallest variant. */
21922 else if (!optimize_for_speed)
21924 if (!count || (count & 3))
21925 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
21927 return rep_prefix_usable ? rep_prefix_4_byte : loop;
21929 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21931 else if (expected_size != -1 && expected_size < 4)
21932 return loop_1_byte;
21933 else if (expected_size != -1)
21936 enum stringop_alg alg = libcall;
21937 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
21939 /* We get here if the algorithms that were not libcall-based
21940 were rep-prefix based and we are unable to use rep prefixes
21941 based on global register usage. Break out of the loop and
21942 use the heuristic below. */
21943 if (algs->size[i].max == 0)
21945 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
21947 enum stringop_alg candidate = algs->size[i].alg;
21949 if (candidate != libcall && ALG_USABLE_P (candidate))
21951 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
21952 last non-libcall inline algorithm. */
21953 if (TARGET_INLINE_ALL_STRINGOPS)
21955 /* When the current size is best to be copied by a libcall,
21956 but we are still forced to inline, run the heuristic below
21957 that will pick code for medium sized blocks. */
21958 if (alg != libcall)
21962 else if (ALG_USABLE_P (candidate))
21966 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
21968 /* When asked to inline the call anyway, try to pick meaningful choice.
21969 We look for maximal size of block that is faster to copy by hand and
21970 take blocks of at most of that size guessing that average size will
21971 be roughly half of the block.
21973 If this turns out to be bad, we might simply specify the preferred
21974 choice in ix86_costs. */
21975 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21976 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
21979 enum stringop_alg alg;
21981 bool any_alg_usable_p = true;
21983 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
21985 enum stringop_alg candidate = algs->size[i].alg;
21986 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
21988 if (candidate != libcall && candidate
21989 && ALG_USABLE_P (candidate))
21990 max = algs->size[i].max;
21992 /* If there aren't any usable algorithms, then recursing on
21993 smaller sizes isn't going to find anything. Just return the
21994 simple byte-at-a-time copy loop. */
21995 if (!any_alg_usable_p)
21997 /* Pick something reasonable. */
21998 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21999 *dynamic_check = 128;
22000 return loop_1_byte;
22004 alg = decide_alg (count, max / 2, memset, dynamic_check);
22005 gcc_assert (*dynamic_check == -1);
22006 gcc_assert (alg != libcall);
22007 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22008 *dynamic_check = max;
22011 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
22012 #undef ALG_USABLE_P
22015 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22016 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22018 decide_alignment (int align,
22019 enum stringop_alg alg,
22022 int desired_align = 0;
22026 gcc_unreachable ();
22028 case unrolled_loop:
22029 desired_align = GET_MODE_SIZE (Pmode);
22031 case rep_prefix_8_byte:
22034 case rep_prefix_4_byte:
22035 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22036 copying whole cacheline at once. */
22037 if (TARGET_PENTIUMPRO)
22042 case rep_prefix_1_byte:
22043 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22044 copying whole cacheline at once. */
22045 if (TARGET_PENTIUMPRO)
22059 if (desired_align < align)
22060 desired_align = align;
22061 if (expected_size != -1 && expected_size < 4)
22062 desired_align = align;
22063 return desired_align;
22066 /* Return the smallest power of 2 greater than VAL. */
22068 smallest_pow2_greater_than (int val)
22076 /* Expand string move (memcpy) operation. Use i386 string operations
22077 when profitable. expand_setmem contains similar code. The code
22078 depends upon architecture, block size and alignment, but always has
22079 the same overall structure:
22081 1) Prologue guard: Conditional that jumps up to epilogues for small
22082 blocks that can be handled by epilogue alone. This is faster
22083 but also needed for correctness, since prologue assume the block
22084 is larger than the desired alignment.
22086 Optional dynamic check for size and libcall for large
22087 blocks is emitted here too, with -minline-stringops-dynamically.
22089 2) Prologue: copy first few bytes in order to get destination
22090 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22091 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22092 copied. We emit either a jump tree on power of two sized
22093 blocks, or a byte loop.
22095 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22096 with specified algorithm.
22098 4) Epilogue: code copying tail of the block that is too small to be
22099 handled by main body (or up to size guarded by prologue guard). */
22102 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
22103 rtx expected_align_exp, rtx expected_size_exp)
22109 rtx jump_around_label = NULL;
22110 HOST_WIDE_INT align = 1;
22111 unsigned HOST_WIDE_INT count = 0;
22112 HOST_WIDE_INT expected_size = -1;
22113 int size_needed = 0, epilogue_size_needed;
22114 int desired_align = 0, align_bytes = 0;
22115 enum stringop_alg alg;
22117 bool need_zero_guard = false;
22119 if (CONST_INT_P (align_exp))
22120 align = INTVAL (align_exp);
22121 /* i386 can do misaligned access on reasonably increased cost. */
22122 if (CONST_INT_P (expected_align_exp)
22123 && INTVAL (expected_align_exp) > align)
22124 align = INTVAL (expected_align_exp);
22125 /* ALIGN is the minimum of destination and source alignment, but we care here
22126 just about destination alignment. */
22127 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
22128 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
22130 if (CONST_INT_P (count_exp))
22131 count = expected_size = INTVAL (count_exp);
22132 if (CONST_INT_P (expected_size_exp) && count == 0)
22133 expected_size = INTVAL (expected_size_exp);
22135 /* Make sure we don't need to care about overflow later on. */
22136 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22139 /* Step 0: Decide on preferred algorithm, desired alignment and
22140 size of chunks to be copied by main loop. */
22142 alg = decide_alg (count, expected_size, false, &dynamic_check);
22143 desired_align = decide_alignment (align, alg, expected_size);
22145 if (!TARGET_ALIGN_STRINGOPS)
22146 align = desired_align;
22148 if (alg == libcall)
22150 gcc_assert (alg != no_stringop);
22152 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
22153 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
22154 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
22159 gcc_unreachable ();
22161 need_zero_guard = true;
22162 size_needed = GET_MODE_SIZE (Pmode);
22164 case unrolled_loop:
22165 need_zero_guard = true;
22166 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
22168 case rep_prefix_8_byte:
22171 case rep_prefix_4_byte:
22174 case rep_prefix_1_byte:
22178 need_zero_guard = true;
22183 epilogue_size_needed = size_needed;
22185 /* Step 1: Prologue guard. */
22187 /* Alignment code needs count to be in register. */
22188 if (CONST_INT_P (count_exp) && desired_align > align)
22190 if (INTVAL (count_exp) > desired_align
22191 && INTVAL (count_exp) > size_needed)
22194 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22195 if (align_bytes <= 0)
22198 align_bytes = desired_align - align_bytes;
22200 if (align_bytes == 0)
22201 count_exp = force_reg (counter_mode (count_exp), count_exp);
22203 gcc_assert (desired_align >= 1 && align >= 1);
22205 /* Ensure that alignment prologue won't copy past end of block. */
22206 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22208 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22209 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22210 Make sure it is power of 2. */
22211 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22215 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22217 /* If main algorithm works on QImode, no epilogue is needed.
22218 For small sizes just don't align anything. */
22219 if (size_needed == 1)
22220 desired_align = align;
22227 label = gen_label_rtx ();
22228 emit_cmp_and_jump_insns (count_exp,
22229 GEN_INT (epilogue_size_needed),
22230 LTU, 0, counter_mode (count_exp), 1, label);
22231 if (expected_size == -1 || expected_size < epilogue_size_needed)
22232 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22234 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22238 /* Emit code to decide on runtime whether library call or inline should be
22240 if (dynamic_check != -1)
22242 if (CONST_INT_P (count_exp))
22244 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
22246 emit_block_move_via_libcall (dst, src, count_exp, false);
22247 count_exp = const0_rtx;
22253 rtx hot_label = gen_label_rtx ();
22254 jump_around_label = gen_label_rtx ();
22255 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22256 LEU, 0, GET_MODE (count_exp), 1, hot_label);
22257 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22258 emit_block_move_via_libcall (dst, src, count_exp, false);
22259 emit_jump (jump_around_label);
22260 emit_label (hot_label);
22264 /* Step 2: Alignment prologue. */
22266 if (desired_align > align)
22268 if (align_bytes == 0)
22270 /* Except for the first move in epilogue, we no longer know
22271 constant offset in aliasing info. It don't seems to worth
22272 the pain to maintain it for the first move, so throw away
22274 src = change_address (src, BLKmode, srcreg);
22275 dst = change_address (dst, BLKmode, destreg);
22276 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
22281 /* If we know how many bytes need to be stored before dst is
22282 sufficiently aligned, maintain aliasing info accurately. */
22283 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
22284 desired_align, align_bytes);
22285 count_exp = plus_constant (count_exp, -align_bytes);
22286 count -= align_bytes;
22288 if (need_zero_guard
22289 && (count < (unsigned HOST_WIDE_INT) size_needed
22290 || (align_bytes == 0
22291 && count < ((unsigned HOST_WIDE_INT) size_needed
22292 + desired_align - align))))
22294 /* It is possible that we copied enough so the main loop will not
22296 gcc_assert (size_needed > 1);
22297 if (label == NULL_RTX)
22298 label = gen_label_rtx ();
22299 emit_cmp_and_jump_insns (count_exp,
22300 GEN_INT (size_needed),
22301 LTU, 0, counter_mode (count_exp), 1, label);
22302 if (expected_size == -1
22303 || expected_size < (desired_align - align) / 2 + size_needed)
22304 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22306 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22309 if (label && size_needed == 1)
22311 emit_label (label);
22312 LABEL_NUSES (label) = 1;
22314 epilogue_size_needed = 1;
22316 else if (label == NULL_RTX)
22317 epilogue_size_needed = size_needed;
22319 /* Step 3: Main loop. */
22325 gcc_unreachable ();
22327 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22328 count_exp, QImode, 1, expected_size);
22331 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22332 count_exp, Pmode, 1, expected_size);
22334 case unrolled_loop:
22335 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22336 registers for 4 temporaries anyway. */
22337 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22338 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
22341 case rep_prefix_8_byte:
22342 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22345 case rep_prefix_4_byte:
22346 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22349 case rep_prefix_1_byte:
22350 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22354 /* Adjust properly the offset of src and dest memory for aliasing. */
22355 if (CONST_INT_P (count_exp))
22357 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
22358 (count / size_needed) * size_needed);
22359 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
22360 (count / size_needed) * size_needed);
22364 src = change_address (src, BLKmode, srcreg);
22365 dst = change_address (dst, BLKmode, destreg);
22368 /* Step 4: Epilogue to copy the remaining bytes. */
22372 /* When the main loop is done, COUNT_EXP might hold original count,
22373 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22374 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22375 bytes. Compensate if needed. */
22377 if (size_needed < epilogue_size_needed)
22380 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22381 GEN_INT (size_needed - 1), count_exp, 1,
22383 if (tmp != count_exp)
22384 emit_move_insn (count_exp, tmp);
22386 emit_label (label);
22387 LABEL_NUSES (label) = 1;
22390 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22391 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
22392 epilogue_size_needed);
22393 if (jump_around_label)
22394 emit_label (jump_around_label);
22398 /* Helper function for memcpy. For QImode value 0xXY produce
22399 0xXYXYXYXY of wide specified by MODE. This is essentially
22400 a * 0x10101010, but we can do slightly better than
22401 synth_mult by unwinding the sequence by hand on CPUs with
22404 promote_duplicated_reg (enum machine_mode mode, rtx val)
22406 enum machine_mode valmode = GET_MODE (val);
22408 int nops = mode == DImode ? 3 : 2;
22410 gcc_assert (mode == SImode || mode == DImode);
22411 if (val == const0_rtx)
22412 return copy_to_mode_reg (mode, const0_rtx);
22413 if (CONST_INT_P (val))
22415 HOST_WIDE_INT v = INTVAL (val) & 255;
22419 if (mode == DImode)
22420 v |= (v << 16) << 16;
22421 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
22424 if (valmode == VOIDmode)
22426 if (valmode != QImode)
22427 val = gen_lowpart (QImode, val);
22428 if (mode == QImode)
22430 if (!TARGET_PARTIAL_REG_STALL)
22432 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
22433 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
22434 <= (ix86_cost->shift_const + ix86_cost->add) * nops
22435 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
22437 rtx reg = convert_modes (mode, QImode, val, true);
22438 tmp = promote_duplicated_reg (mode, const1_rtx);
22439 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
22444 rtx reg = convert_modes (mode, QImode, val, true);
22446 if (!TARGET_PARTIAL_REG_STALL)
22447 if (mode == SImode)
22448 emit_insn (gen_movsi_insv_1 (reg, reg));
22450 emit_insn (gen_movdi_insv_1 (reg, reg));
22453 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
22454 NULL, 1, OPTAB_DIRECT);
22456 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22458 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
22459 NULL, 1, OPTAB_DIRECT);
22460 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22461 if (mode == SImode)
22463 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
22464 NULL, 1, OPTAB_DIRECT);
22465 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22470 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22471 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22472 alignment from ALIGN to DESIRED_ALIGN. */
22474 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
22479 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
22480 promoted_val = promote_duplicated_reg (DImode, val);
22481 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
22482 promoted_val = promote_duplicated_reg (SImode, val);
22483 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
22484 promoted_val = promote_duplicated_reg (HImode, val);
22486 promoted_val = val;
22488 return promoted_val;
22491 /* Expand string clear operation (bzero). Use i386 string operations when
22492 profitable. See expand_movmem comment for explanation of individual
22493 steps performed. */
22495 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
22496 rtx expected_align_exp, rtx expected_size_exp)
22501 rtx jump_around_label = NULL;
22502 HOST_WIDE_INT align = 1;
22503 unsigned HOST_WIDE_INT count = 0;
22504 HOST_WIDE_INT expected_size = -1;
22505 int size_needed = 0, epilogue_size_needed;
22506 int desired_align = 0, align_bytes = 0;
22507 enum stringop_alg alg;
22508 rtx promoted_val = NULL;
22509 bool force_loopy_epilogue = false;
22511 bool need_zero_guard = false;
22513 if (CONST_INT_P (align_exp))
22514 align = INTVAL (align_exp);
22515 /* i386 can do misaligned access on reasonably increased cost. */
22516 if (CONST_INT_P (expected_align_exp)
22517 && INTVAL (expected_align_exp) > align)
22518 align = INTVAL (expected_align_exp);
22519 if (CONST_INT_P (count_exp))
22520 count = expected_size = INTVAL (count_exp);
22521 if (CONST_INT_P (expected_size_exp) && count == 0)
22522 expected_size = INTVAL (expected_size_exp);
22524 /* Make sure we don't need to care about overflow later on. */
22525 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22528 /* Step 0: Decide on preferred algorithm, desired alignment and
22529 size of chunks to be copied by main loop. */
22531 alg = decide_alg (count, expected_size, true, &dynamic_check);
22532 desired_align = decide_alignment (align, alg, expected_size);
22534 if (!TARGET_ALIGN_STRINGOPS)
22535 align = desired_align;
22537 if (alg == libcall)
22539 gcc_assert (alg != no_stringop);
22541 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
22542 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
22547 gcc_unreachable ();
22549 need_zero_guard = true;
22550 size_needed = GET_MODE_SIZE (Pmode);
22552 case unrolled_loop:
22553 need_zero_guard = true;
22554 size_needed = GET_MODE_SIZE (Pmode) * 4;
22556 case rep_prefix_8_byte:
22559 case rep_prefix_4_byte:
22562 case rep_prefix_1_byte:
22566 need_zero_guard = true;
22570 epilogue_size_needed = size_needed;
22572 /* Step 1: Prologue guard. */
22574 /* Alignment code needs count to be in register. */
22575 if (CONST_INT_P (count_exp) && desired_align > align)
22577 if (INTVAL (count_exp) > desired_align
22578 && INTVAL (count_exp) > size_needed)
22581 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22582 if (align_bytes <= 0)
22585 align_bytes = desired_align - align_bytes;
22587 if (align_bytes == 0)
22589 enum machine_mode mode = SImode;
22590 if (TARGET_64BIT && (count & ~0xffffffff))
22592 count_exp = force_reg (mode, count_exp);
22595 /* Do the cheap promotion to allow better CSE across the
22596 main loop and epilogue (ie one load of the big constant in the
22597 front of all code. */
22598 if (CONST_INT_P (val_exp))
22599 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
22600 desired_align, align);
22601 /* Ensure that alignment prologue won't copy past end of block. */
22602 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22604 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22605 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22606 Make sure it is power of 2. */
22607 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22609 /* To improve performance of small blocks, we jump around the VAL
22610 promoting mode. This mean that if the promoted VAL is not constant,
22611 we might not use it in the epilogue and have to use byte
22613 if (epilogue_size_needed > 2 && !promoted_val)
22614 force_loopy_epilogue = true;
22617 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22619 /* If main algorithm works on QImode, no epilogue is needed.
22620 For small sizes just don't align anything. */
22621 if (size_needed == 1)
22622 desired_align = align;
22629 label = gen_label_rtx ();
22630 emit_cmp_and_jump_insns (count_exp,
22631 GEN_INT (epilogue_size_needed),
22632 LTU, 0, counter_mode (count_exp), 1, label);
22633 if (expected_size == -1 || expected_size <= epilogue_size_needed)
22634 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22636 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22639 if (dynamic_check != -1)
22641 rtx hot_label = gen_label_rtx ();
22642 jump_around_label = gen_label_rtx ();
22643 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22644 LEU, 0, counter_mode (count_exp), 1, hot_label);
22645 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22646 set_storage_via_libcall (dst, count_exp, val_exp, false);
22647 emit_jump (jump_around_label);
22648 emit_label (hot_label);
22651 /* Step 2: Alignment prologue. */
22653 /* Do the expensive promotion once we branched off the small blocks. */
22655 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
22656 desired_align, align);
22657 gcc_assert (desired_align >= 1 && align >= 1);
22659 if (desired_align > align)
22661 if (align_bytes == 0)
22663 /* Except for the first move in epilogue, we no longer know
22664 constant offset in aliasing info. It don't seems to worth
22665 the pain to maintain it for the first move, so throw away
22667 dst = change_address (dst, BLKmode, destreg);
22668 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
22673 /* If we know how many bytes need to be stored before dst is
22674 sufficiently aligned, maintain aliasing info accurately. */
22675 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
22676 desired_align, align_bytes);
22677 count_exp = plus_constant (count_exp, -align_bytes);
22678 count -= align_bytes;
22680 if (need_zero_guard
22681 && (count < (unsigned HOST_WIDE_INT) size_needed
22682 || (align_bytes == 0
22683 && count < ((unsigned HOST_WIDE_INT) size_needed
22684 + desired_align - align))))
22686 /* It is possible that we copied enough so the main loop will not
22688 gcc_assert (size_needed > 1);
22689 if (label == NULL_RTX)
22690 label = gen_label_rtx ();
22691 emit_cmp_and_jump_insns (count_exp,
22692 GEN_INT (size_needed),
22693 LTU, 0, counter_mode (count_exp), 1, label);
22694 if (expected_size == -1
22695 || expected_size < (desired_align - align) / 2 + size_needed)
22696 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22698 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22701 if (label && size_needed == 1)
22703 emit_label (label);
22704 LABEL_NUSES (label) = 1;
22706 promoted_val = val_exp;
22707 epilogue_size_needed = 1;
22709 else if (label == NULL_RTX)
22710 epilogue_size_needed = size_needed;
22712 /* Step 3: Main loop. */
22718 gcc_unreachable ();
22720 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22721 count_exp, QImode, 1, expected_size);
22724 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22725 count_exp, Pmode, 1, expected_size);
22727 case unrolled_loop:
22728 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22729 count_exp, Pmode, 4, expected_size);
22731 case rep_prefix_8_byte:
22732 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22735 case rep_prefix_4_byte:
22736 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22739 case rep_prefix_1_byte:
22740 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22744 /* Adjust properly the offset of src and dest memory for aliasing. */
22745 if (CONST_INT_P (count_exp))
22746 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
22747 (count / size_needed) * size_needed);
22749 dst = change_address (dst, BLKmode, destreg);
22751 /* Step 4: Epilogue to copy the remaining bytes. */
22755 /* When the main loop is done, COUNT_EXP might hold original count,
22756 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22757 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22758 bytes. Compensate if needed. */
22760 if (size_needed < epilogue_size_needed)
22763 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22764 GEN_INT (size_needed - 1), count_exp, 1,
22766 if (tmp != count_exp)
22767 emit_move_insn (count_exp, tmp);
22769 emit_label (label);
22770 LABEL_NUSES (label) = 1;
22773 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22775 if (force_loopy_epilogue)
22776 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
22777 epilogue_size_needed);
22779 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
22780 epilogue_size_needed);
22782 if (jump_around_label)
22783 emit_label (jump_around_label);
22787 /* Expand the appropriate insns for doing strlen if not just doing
22790 out = result, initialized with the start address
22791 align_rtx = alignment of the address.
22792 scratch = scratch register, initialized with the startaddress when
22793 not aligned, otherwise undefined
22795 This is just the body. It needs the initializations mentioned above and
22796 some address computing at the end. These things are done in i386.md. */
22799 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
22803 rtx align_2_label = NULL_RTX;
22804 rtx align_3_label = NULL_RTX;
22805 rtx align_4_label = gen_label_rtx ();
22806 rtx end_0_label = gen_label_rtx ();
22808 rtx tmpreg = gen_reg_rtx (SImode);
22809 rtx scratch = gen_reg_rtx (SImode);
22813 if (CONST_INT_P (align_rtx))
22814 align = INTVAL (align_rtx);
22816 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22818 /* Is there a known alignment and is it less than 4? */
22821 rtx scratch1 = gen_reg_rtx (Pmode);
22822 emit_move_insn (scratch1, out);
22823 /* Is there a known alignment and is it not 2? */
22826 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
22827 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
22829 /* Leave just the 3 lower bits. */
22830 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
22831 NULL_RTX, 0, OPTAB_WIDEN);
22833 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22834 Pmode, 1, align_4_label);
22835 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
22836 Pmode, 1, align_2_label);
22837 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
22838 Pmode, 1, align_3_label);
22842 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22843 check if is aligned to 4 - byte. */
22845 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
22846 NULL_RTX, 0, OPTAB_WIDEN);
22848 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22849 Pmode, 1, align_4_label);
22852 mem = change_address (src, QImode, out);
22854 /* Now compare the bytes. */
22856 /* Compare the first n unaligned byte on a byte per byte basis. */
22857 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
22858 QImode, 1, end_0_label);
22860 /* Increment the address. */
22861 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22863 /* Not needed with an alignment of 2 */
22866 emit_label (align_2_label);
22868 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
22871 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22873 emit_label (align_3_label);
22876 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
22879 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22882 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22883 align this loop. It gives only huge programs, but does not help to
22885 emit_label (align_4_label);
22887 mem = change_address (src, SImode, out);
22888 emit_move_insn (scratch, mem);
22889 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
22891 /* This formula yields a nonzero result iff one of the bytes is zero.
22892 This saves three branches inside loop and many cycles. */
22894 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
22895 emit_insn (gen_one_cmplsi2 (scratch, scratch));
22896 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
22897 emit_insn (gen_andsi3 (tmpreg, tmpreg,
22898 gen_int_mode (0x80808080, SImode)));
22899 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
22904 rtx reg = gen_reg_rtx (SImode);
22905 rtx reg2 = gen_reg_rtx (Pmode);
22906 emit_move_insn (reg, tmpreg);
22907 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
22909 /* If zero is not in the first two bytes, move two bytes forward. */
22910 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
22911 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22912 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
22913 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
22914 gen_rtx_IF_THEN_ELSE (SImode, tmp,
22917 /* Emit lea manually to avoid clobbering of flags. */
22918 emit_insn (gen_rtx_SET (SImode, reg2,
22919 gen_rtx_PLUS (Pmode, out, const2_rtx)));
22921 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22922 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
22923 emit_insn (gen_rtx_SET (VOIDmode, out,
22924 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
22930 rtx end_2_label = gen_label_rtx ();
22931 /* Is zero in the first two bytes? */
22933 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
22934 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22935 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
22936 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22937 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
22939 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22940 JUMP_LABEL (tmp) = end_2_label;
22942 /* Not in the first two. Move two bytes forward. */
22943 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
22944 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
22946 emit_label (end_2_label);
22950 /* Avoid branch in fixing the byte. */
22951 tmpreg = gen_lowpart (QImode, tmpreg);
22952 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
22953 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
22954 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
22955 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
22957 emit_label (end_0_label);
22960 /* Expand strlen. */
22963 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
22965 rtx addr, scratch1, scratch2, scratch3, scratch4;
22967 /* The generic case of strlen expander is long. Avoid it's
22968 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
22970 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
22971 && !TARGET_INLINE_ALL_STRINGOPS
22972 && !optimize_insn_for_size_p ()
22973 && (!CONST_INT_P (align) || INTVAL (align) < 4))
22976 addr = force_reg (Pmode, XEXP (src, 0));
22977 scratch1 = gen_reg_rtx (Pmode);
22979 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
22980 && !optimize_insn_for_size_p ())
22982 /* Well it seems that some optimizer does not combine a call like
22983 foo(strlen(bar), strlen(bar));
22984 when the move and the subtraction is done here. It does calculate
22985 the length just once when these instructions are done inside of
22986 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
22987 often used and I use one fewer register for the lifetime of
22988 output_strlen_unroll() this is better. */
22990 emit_move_insn (out, addr);
22992 ix86_expand_strlensi_unroll_1 (out, src, align);
22994 /* strlensi_unroll_1 returns the address of the zero at the end of
22995 the string, like memchr(), so compute the length by subtracting
22996 the start address. */
22997 emit_insn (ix86_gen_sub3 (out, out, addr));
23003 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23004 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
23007 scratch2 = gen_reg_rtx (Pmode);
23008 scratch3 = gen_reg_rtx (Pmode);
23009 scratch4 = force_reg (Pmode, constm1_rtx);
23011 emit_move_insn (scratch3, addr);
23012 eoschar = force_reg (QImode, eoschar);
23014 src = replace_equiv_address_nv (src, scratch3);
23016 /* If .md starts supporting :P, this can be done in .md. */
23017 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
23018 scratch4), UNSPEC_SCAS);
23019 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
23020 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
23021 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
23026 /* For given symbol (function) construct code to compute address of it's PLT
23027 entry in large x86-64 PIC model. */
23029 construct_plt_address (rtx symbol)
23031 rtx tmp = gen_reg_rtx (Pmode);
23032 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
23034 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
23035 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
23037 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
23038 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
23043 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
23045 rtx pop, bool sibcall)
23047 /* We need to represent that SI and DI registers are clobbered
23049 static int clobbered_registers[] = {
23050 XMM6_REG, XMM7_REG, XMM8_REG,
23051 XMM9_REG, XMM10_REG, XMM11_REG,
23052 XMM12_REG, XMM13_REG, XMM14_REG,
23053 XMM15_REG, SI_REG, DI_REG
23055 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
23056 rtx use = NULL, call;
23057 unsigned int vec_len;
23059 if (pop == const0_rtx)
23061 gcc_assert (!TARGET_64BIT || !pop);
23063 if (TARGET_MACHO && !TARGET_64BIT)
23066 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
23067 fnaddr = machopic_indirect_call_target (fnaddr);
23072 /* Static functions and indirect calls don't need the pic register. */
23073 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
23074 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23075 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
23076 use_reg (&use, pic_offset_table_rtx);
23079 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
23081 rtx al = gen_rtx_REG (QImode, AX_REG);
23082 emit_move_insn (al, callarg2);
23083 use_reg (&use, al);
23086 if (ix86_cmodel == CM_LARGE_PIC
23088 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23089 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
23090 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
23092 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
23093 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
23095 fnaddr = XEXP (fnaddr, 0);
23096 if (GET_MODE (fnaddr) != Pmode)
23097 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
23098 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
23102 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
23104 call = gen_rtx_SET (VOIDmode, retval, call);
23105 vec[vec_len++] = call;
23109 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
23110 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
23111 vec[vec_len++] = pop;
23114 if (TARGET_64BIT_MS_ABI
23115 && (!callarg2 || INTVAL (callarg2) != -2))
23119 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
23120 UNSPEC_MS_TO_SYSV_CALL);
23122 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
23124 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
23126 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
23128 clobbered_registers[i]));
23131 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23132 if (TARGET_VZEROUPPER)
23135 if (cfun->machine->callee_pass_avx256_p)
23137 if (cfun->machine->callee_return_avx256_p)
23138 avx256 = callee_return_pass_avx256;
23140 avx256 = callee_pass_avx256;
23142 else if (cfun->machine->callee_return_avx256_p)
23143 avx256 = callee_return_avx256;
23145 avx256 = call_no_avx256;
23147 if (reload_completed)
23148 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
23150 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
23151 gen_rtvec (1, GEN_INT (avx256)),
23152 UNSPEC_CALL_NEEDS_VZEROUPPER);
23156 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
23157 call = emit_call_insn (call);
23159 CALL_INSN_FUNCTION_USAGE (call) = use;
23165 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
23167 rtx pat = PATTERN (insn);
23168 rtvec vec = XVEC (pat, 0);
23169 int len = GET_NUM_ELEM (vec) - 1;
23171 /* Strip off the last entry of the parallel. */
23172 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
23173 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
23175 pat = RTVEC_ELT (vec, 0);
23177 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
23179 emit_insn (gen_avx_vzeroupper (vzeroupper));
23180 emit_call_insn (pat);
23183 /* Output the assembly for a call instruction. */
23186 ix86_output_call_insn (rtx insn, rtx call_op)
23188 bool direct_p = constant_call_address_operand (call_op, Pmode);
23189 bool seh_nop_p = false;
23192 if (SIBLING_CALL_P (insn))
23196 /* SEH epilogue detection requires the indirect branch case
23197 to include REX.W. */
23198 else if (TARGET_SEH)
23199 xasm = "rex.W jmp %A0";
23203 output_asm_insn (xasm, &call_op);
23207 /* SEH unwinding can require an extra nop to be emitted in several
23208 circumstances. Determine if we have one of those. */
23213 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
23215 /* If we get to another real insn, we don't need the nop. */
23219 /* If we get to the epilogue note, prevent a catch region from
23220 being adjacent to the standard epilogue sequence. If non-
23221 call-exceptions, we'll have done this during epilogue emission. */
23222 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
23223 && !flag_non_call_exceptions
23224 && !can_throw_internal (insn))
23231 /* If we didn't find a real insn following the call, prevent the
23232 unwinder from looking into the next function. */
23238 xasm = "call\t%P0";
23240 xasm = "call\t%A0";
23242 output_asm_insn (xasm, &call_op);
23250 /* Clear stack slot assignments remembered from previous functions.
23251 This is called from INIT_EXPANDERS once before RTL is emitted for each
23254 static struct machine_function *
23255 ix86_init_machine_status (void)
23257 struct machine_function *f;
23259 f = ggc_alloc_cleared_machine_function ();
23260 f->use_fast_prologue_epilogue_nregs = -1;
23261 f->tls_descriptor_call_expanded_p = 0;
23262 f->call_abi = ix86_abi;
23267 /* Return a MEM corresponding to a stack slot with mode MODE.
23268 Allocate a new slot if necessary.
23270 The RTL for a function can have several slots available: N is
23271 which slot to use. */
23274 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
23276 struct stack_local_entry *s;
23278 gcc_assert (n < MAX_386_STACK_LOCALS);
23280 /* Virtual slot is valid only before vregs are instantiated. */
23281 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
23283 for (s = ix86_stack_locals; s; s = s->next)
23284 if (s->mode == mode && s->n == n)
23285 return validize_mem (copy_rtx (s->rtl));
23287 s = ggc_alloc_stack_local_entry ();
23290 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
23292 s->next = ix86_stack_locals;
23293 ix86_stack_locals = s;
23294 return validize_mem (s->rtl);
23297 /* Calculate the length of the memory address in the instruction encoding.
23298 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23299 or other prefixes. */
23302 memory_address_length (rtx addr)
23304 struct ix86_address parts;
23305 rtx base, index, disp;
23309 if (GET_CODE (addr) == PRE_DEC
23310 || GET_CODE (addr) == POST_INC
23311 || GET_CODE (addr) == PRE_MODIFY
23312 || GET_CODE (addr) == POST_MODIFY)
23315 ok = ix86_decompose_address (addr, &parts);
23318 if (parts.base && GET_CODE (parts.base) == SUBREG)
23319 parts.base = SUBREG_REG (parts.base);
23320 if (parts.index && GET_CODE (parts.index) == SUBREG)
23321 parts.index = SUBREG_REG (parts.index);
23324 index = parts.index;
23327 /* Add length of addr32 prefix. */
23328 len = (GET_CODE (addr) == ZERO_EXTEND
23329 || GET_CODE (addr) == AND);
23332 - esp as the base always wants an index,
23333 - ebp as the base always wants a displacement,
23334 - r12 as the base always wants an index,
23335 - r13 as the base always wants a displacement. */
23337 /* Register Indirect. */
23338 if (base && !index && !disp)
23340 /* esp (for its index) and ebp (for its displacement) need
23341 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23344 && (addr == arg_pointer_rtx
23345 || addr == frame_pointer_rtx
23346 || REGNO (addr) == SP_REG
23347 || REGNO (addr) == BP_REG
23348 || REGNO (addr) == R12_REG
23349 || REGNO (addr) == R13_REG))
23353 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23354 is not disp32, but disp32(%rip), so for disp32
23355 SIB byte is needed, unless print_operand_address
23356 optimizes it into disp32(%rip) or (%rip) is implied
23358 else if (disp && !base && !index)
23365 if (GET_CODE (disp) == CONST)
23366 symbol = XEXP (disp, 0);
23367 if (GET_CODE (symbol) == PLUS
23368 && CONST_INT_P (XEXP (symbol, 1)))
23369 symbol = XEXP (symbol, 0);
23371 if (GET_CODE (symbol) != LABEL_REF
23372 && (GET_CODE (symbol) != SYMBOL_REF
23373 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
23374 && (GET_CODE (symbol) != UNSPEC
23375 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
23376 && XINT (symbol, 1) != UNSPEC_PCREL
23377 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
23384 /* Find the length of the displacement constant. */
23387 if (base && satisfies_constraint_K (disp))
23392 /* ebp always wants a displacement. Similarly r13. */
23393 else if (base && REG_P (base)
23394 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
23397 /* An index requires the two-byte modrm form.... */
23399 /* ...like esp (or r12), which always wants an index. */
23400 || base == arg_pointer_rtx
23401 || base == frame_pointer_rtx
23402 || (base && REG_P (base)
23403 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
23420 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23421 is set, expect that insn have 8bit immediate alternative. */
23423 ix86_attr_length_immediate_default (rtx insn, bool shortform)
23427 extract_insn_cached (insn);
23428 for (i = recog_data.n_operands - 1; i >= 0; --i)
23429 if (CONSTANT_P (recog_data.operand[i]))
23431 enum attr_mode mode = get_attr_mode (insn);
23434 if (shortform && CONST_INT_P (recog_data.operand[i]))
23436 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
23443 ival = trunc_int_for_mode (ival, HImode);
23446 ival = trunc_int_for_mode (ival, SImode);
23451 if (IN_RANGE (ival, -128, 127))
23468 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23473 fatal_insn ("unknown insn mode", insn);
23478 /* Compute default value for "length_address" attribute. */
23480 ix86_attr_length_address_default (rtx insn)
23484 if (get_attr_type (insn) == TYPE_LEA)
23486 rtx set = PATTERN (insn), addr;
23488 if (GET_CODE (set) == PARALLEL)
23489 set = XVECEXP (set, 0, 0);
23491 gcc_assert (GET_CODE (set) == SET);
23493 addr = SET_SRC (set);
23494 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
23496 if (GET_CODE (addr) == ZERO_EXTEND)
23497 addr = XEXP (addr, 0);
23498 if (GET_CODE (addr) == SUBREG)
23499 addr = SUBREG_REG (addr);
23502 return memory_address_length (addr);
23505 extract_insn_cached (insn);
23506 for (i = recog_data.n_operands - 1; i >= 0; --i)
23507 if (MEM_P (recog_data.operand[i]))
23509 constrain_operands_cached (reload_completed);
23510 if (which_alternative != -1)
23512 const char *constraints = recog_data.constraints[i];
23513 int alt = which_alternative;
23515 while (*constraints == '=' || *constraints == '+')
23518 while (*constraints++ != ',')
23520 /* Skip ignored operands. */
23521 if (*constraints == 'X')
23524 return memory_address_length (XEXP (recog_data.operand[i], 0));
23529 /* Compute default value for "length_vex" attribute. It includes
23530 2 or 3 byte VEX prefix and 1 opcode byte. */
23533 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
23537 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23538 byte VEX prefix. */
23539 if (!has_0f_opcode || has_vex_w)
23542 /* We can always use 2 byte VEX prefix in 32bit. */
23546 extract_insn_cached (insn);
23548 for (i = recog_data.n_operands - 1; i >= 0; --i)
23549 if (REG_P (recog_data.operand[i]))
23551 /* REX.W bit uses 3 byte VEX prefix. */
23552 if (GET_MODE (recog_data.operand[i]) == DImode
23553 && GENERAL_REG_P (recog_data.operand[i]))
23558 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23559 if (MEM_P (recog_data.operand[i])
23560 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
23567 /* Return the maximum number of instructions a cpu can issue. */
23570 ix86_issue_rate (void)
23574 case PROCESSOR_PENTIUM:
23575 case PROCESSOR_ATOM:
23579 case PROCESSOR_PENTIUMPRO:
23580 case PROCESSOR_PENTIUM4:
23581 case PROCESSOR_CORE2_32:
23582 case PROCESSOR_CORE2_64:
23583 case PROCESSOR_COREI7_32:
23584 case PROCESSOR_COREI7_64:
23585 case PROCESSOR_ATHLON:
23587 case PROCESSOR_AMDFAM10:
23588 case PROCESSOR_NOCONA:
23589 case PROCESSOR_GENERIC32:
23590 case PROCESSOR_GENERIC64:
23591 case PROCESSOR_BDVER1:
23592 case PROCESSOR_BDVER2:
23593 case PROCESSOR_BTVER1:
23601 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23602 by DEP_INSN and nothing set by DEP_INSN. */
23605 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
23609 /* Simplify the test for uninteresting insns. */
23610 if (insn_type != TYPE_SETCC
23611 && insn_type != TYPE_ICMOV
23612 && insn_type != TYPE_FCMOV
23613 && insn_type != TYPE_IBR)
23616 if ((set = single_set (dep_insn)) != 0)
23618 set = SET_DEST (set);
23621 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
23622 && XVECLEN (PATTERN (dep_insn), 0) == 2
23623 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
23624 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
23626 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
23627 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
23632 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
23635 /* This test is true if the dependent insn reads the flags but
23636 not any other potentially set register. */
23637 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
23640 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
23646 /* Return true iff USE_INSN has a memory address with operands set by
23650 ix86_agi_dependent (rtx set_insn, rtx use_insn)
23653 extract_insn_cached (use_insn);
23654 for (i = recog_data.n_operands - 1; i >= 0; --i)
23655 if (MEM_P (recog_data.operand[i]))
23657 rtx addr = XEXP (recog_data.operand[i], 0);
23658 return modified_in_p (addr, set_insn) != 0;
23664 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
23666 enum attr_type insn_type, dep_insn_type;
23667 enum attr_memory memory;
23669 int dep_insn_code_number;
23671 /* Anti and output dependencies have zero cost on all CPUs. */
23672 if (REG_NOTE_KIND (link) != 0)
23675 dep_insn_code_number = recog_memoized (dep_insn);
23677 /* If we can't recognize the insns, we can't really do anything. */
23678 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
23681 insn_type = get_attr_type (insn);
23682 dep_insn_type = get_attr_type (dep_insn);
23686 case PROCESSOR_PENTIUM:
23687 /* Address Generation Interlock adds a cycle of latency. */
23688 if (insn_type == TYPE_LEA)
23690 rtx addr = PATTERN (insn);
23692 if (GET_CODE (addr) == PARALLEL)
23693 addr = XVECEXP (addr, 0, 0);
23695 gcc_assert (GET_CODE (addr) == SET);
23697 addr = SET_SRC (addr);
23698 if (modified_in_p (addr, dep_insn))
23701 else if (ix86_agi_dependent (dep_insn, insn))
23704 /* ??? Compares pair with jump/setcc. */
23705 if (ix86_flags_dependent (insn, dep_insn, insn_type))
23708 /* Floating point stores require value to be ready one cycle earlier. */
23709 if (insn_type == TYPE_FMOV
23710 && get_attr_memory (insn) == MEMORY_STORE
23711 && !ix86_agi_dependent (dep_insn, insn))
23715 case PROCESSOR_PENTIUMPRO:
23716 memory = get_attr_memory (insn);
23718 /* INT->FP conversion is expensive. */
23719 if (get_attr_fp_int_src (dep_insn))
23722 /* There is one cycle extra latency between an FP op and a store. */
23723 if (insn_type == TYPE_FMOV
23724 && (set = single_set (dep_insn)) != NULL_RTX
23725 && (set2 = single_set (insn)) != NULL_RTX
23726 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
23727 && MEM_P (SET_DEST (set2)))
23730 /* Show ability of reorder buffer to hide latency of load by executing
23731 in parallel with previous instruction in case
23732 previous instruction is not needed to compute the address. */
23733 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23734 && !ix86_agi_dependent (dep_insn, insn))
23736 /* Claim moves to take one cycle, as core can issue one load
23737 at time and the next load can start cycle later. */
23738 if (dep_insn_type == TYPE_IMOV
23739 || dep_insn_type == TYPE_FMOV)
23747 memory = get_attr_memory (insn);
23749 /* The esp dependency is resolved before the instruction is really
23751 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
23752 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
23755 /* INT->FP conversion is expensive. */
23756 if (get_attr_fp_int_src (dep_insn))
23759 /* Show ability of reorder buffer to hide latency of load by executing
23760 in parallel with previous instruction in case
23761 previous instruction is not needed to compute the address. */
23762 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23763 && !ix86_agi_dependent (dep_insn, insn))
23765 /* Claim moves to take one cycle, as core can issue one load
23766 at time and the next load can start cycle later. */
23767 if (dep_insn_type == TYPE_IMOV
23768 || dep_insn_type == TYPE_FMOV)
23777 case PROCESSOR_ATHLON:
23779 case PROCESSOR_AMDFAM10:
23780 case PROCESSOR_BDVER1:
23781 case PROCESSOR_BDVER2:
23782 case PROCESSOR_BTVER1:
23783 case PROCESSOR_ATOM:
23784 case PROCESSOR_GENERIC32:
23785 case PROCESSOR_GENERIC64:
23786 memory = get_attr_memory (insn);
23788 /* Show ability of reorder buffer to hide latency of load by executing
23789 in parallel with previous instruction in case
23790 previous instruction is not needed to compute the address. */
23791 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23792 && !ix86_agi_dependent (dep_insn, insn))
23794 enum attr_unit unit = get_attr_unit (insn);
23797 /* Because of the difference between the length of integer and
23798 floating unit pipeline preparation stages, the memory operands
23799 for floating point are cheaper.
23801 ??? For Athlon it the difference is most probably 2. */
23802 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
23805 loadcost = TARGET_ATHLON ? 2 : 0;
23807 if (cost >= loadcost)
23820 /* How many alternative schedules to try. This should be as wide as the
23821 scheduling freedom in the DFA, but no wider. Making this value too
23822 large results extra work for the scheduler. */
23825 ia32_multipass_dfa_lookahead (void)
23829 case PROCESSOR_PENTIUM:
23832 case PROCESSOR_PENTIUMPRO:
23836 case PROCESSOR_CORE2_32:
23837 case PROCESSOR_CORE2_64:
23838 case PROCESSOR_COREI7_32:
23839 case PROCESSOR_COREI7_64:
23840 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23841 as many instructions can be executed on a cycle, i.e.,
23842 issue_rate. I wonder why tuning for many CPUs does not do this. */
23843 return ix86_issue_rate ();
23852 /* Model decoder of Core 2/i7.
23853 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
23854 track the instruction fetch block boundaries and make sure that long
23855 (9+ bytes) instructions are assigned to D0. */
23857 /* Maximum length of an insn that can be handled by
23858 a secondary decoder unit. '8' for Core 2/i7. */
23859 static int core2i7_secondary_decoder_max_insn_size;
23861 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
23862 '16' for Core 2/i7. */
23863 static int core2i7_ifetch_block_size;
23865 /* Maximum number of instructions decoder can handle per cycle.
23866 '6' for Core 2/i7. */
23867 static int core2i7_ifetch_block_max_insns;
23869 typedef struct ix86_first_cycle_multipass_data_ *
23870 ix86_first_cycle_multipass_data_t;
23871 typedef const struct ix86_first_cycle_multipass_data_ *
23872 const_ix86_first_cycle_multipass_data_t;
23874 /* A variable to store target state across calls to max_issue within
23876 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
23877 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
23879 /* Initialize DATA. */
23881 core2i7_first_cycle_multipass_init (void *_data)
23883 ix86_first_cycle_multipass_data_t data
23884 = (ix86_first_cycle_multipass_data_t) _data;
23886 data->ifetch_block_len = 0;
23887 data->ifetch_block_n_insns = 0;
23888 data->ready_try_change = NULL;
23889 data->ready_try_change_size = 0;
23892 /* Advancing the cycle; reset ifetch block counts. */
23894 core2i7_dfa_post_advance_cycle (void)
23896 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
23898 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
23900 data->ifetch_block_len = 0;
23901 data->ifetch_block_n_insns = 0;
23904 static int min_insn_size (rtx);
23906 /* Filter out insns from ready_try that the core will not be able to issue
23907 on current cycle due to decoder. */
23909 core2i7_first_cycle_multipass_filter_ready_try
23910 (const_ix86_first_cycle_multipass_data_t data,
23911 char *ready_try, int n_ready, bool first_cycle_insn_p)
23918 if (ready_try[n_ready])
23921 insn = get_ready_element (n_ready);
23922 insn_size = min_insn_size (insn);
23924 if (/* If this is a too long an insn for a secondary decoder ... */
23925 (!first_cycle_insn_p
23926 && insn_size > core2i7_secondary_decoder_max_insn_size)
23927 /* ... or it would not fit into the ifetch block ... */
23928 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
23929 /* ... or the decoder is full already ... */
23930 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
23931 /* ... mask the insn out. */
23933 ready_try[n_ready] = 1;
23935 if (data->ready_try_change)
23936 SET_BIT (data->ready_try_change, n_ready);
23941 /* Prepare for a new round of multipass lookahead scheduling. */
23943 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
23944 bool first_cycle_insn_p)
23946 ix86_first_cycle_multipass_data_t data
23947 = (ix86_first_cycle_multipass_data_t) _data;
23948 const_ix86_first_cycle_multipass_data_t prev_data
23949 = ix86_first_cycle_multipass_data;
23951 /* Restore the state from the end of the previous round. */
23952 data->ifetch_block_len = prev_data->ifetch_block_len;
23953 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
23955 /* Filter instructions that cannot be issued on current cycle due to
23956 decoder restrictions. */
23957 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
23958 first_cycle_insn_p);
23961 /* INSN is being issued in current solution. Account for its impact on
23962 the decoder model. */
23964 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
23965 rtx insn, const void *_prev_data)
23967 ix86_first_cycle_multipass_data_t data
23968 = (ix86_first_cycle_multipass_data_t) _data;
23969 const_ix86_first_cycle_multipass_data_t prev_data
23970 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
23972 int insn_size = min_insn_size (insn);
23974 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
23975 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
23976 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
23977 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
23979 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
23980 if (!data->ready_try_change)
23982 data->ready_try_change = sbitmap_alloc (n_ready);
23983 data->ready_try_change_size = n_ready;
23985 else if (data->ready_try_change_size < n_ready)
23987 data->ready_try_change = sbitmap_resize (data->ready_try_change,
23989 data->ready_try_change_size = n_ready;
23991 sbitmap_zero (data->ready_try_change);
23993 /* Filter out insns from ready_try that the core will not be able to issue
23994 on current cycle due to decoder. */
23995 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
23999 /* Revert the effect on ready_try. */
24001 core2i7_first_cycle_multipass_backtrack (const void *_data,
24003 int n_ready ATTRIBUTE_UNUSED)
24005 const_ix86_first_cycle_multipass_data_t data
24006 = (const_ix86_first_cycle_multipass_data_t) _data;
24007 unsigned int i = 0;
24008 sbitmap_iterator sbi;
24010 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
24011 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
24017 /* Save the result of multipass lookahead scheduling for the next round. */
24019 core2i7_first_cycle_multipass_end (const void *_data)
24021 const_ix86_first_cycle_multipass_data_t data
24022 = (const_ix86_first_cycle_multipass_data_t) _data;
24023 ix86_first_cycle_multipass_data_t next_data
24024 = ix86_first_cycle_multipass_data;
24028 next_data->ifetch_block_len = data->ifetch_block_len;
24029 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
24033 /* Deallocate target data. */
24035 core2i7_first_cycle_multipass_fini (void *_data)
24037 ix86_first_cycle_multipass_data_t data
24038 = (ix86_first_cycle_multipass_data_t) _data;
24040 if (data->ready_try_change)
24042 sbitmap_free (data->ready_try_change);
24043 data->ready_try_change = NULL;
24044 data->ready_try_change_size = 0;
24048 /* Prepare for scheduling pass. */
24050 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
24051 int verbose ATTRIBUTE_UNUSED,
24052 int max_uid ATTRIBUTE_UNUSED)
24054 /* Install scheduling hooks for current CPU. Some of these hooks are used
24055 in time-critical parts of the scheduler, so we only set them up when
24056 they are actually used. */
24059 case PROCESSOR_CORE2_32:
24060 case PROCESSOR_CORE2_64:
24061 case PROCESSOR_COREI7_32:
24062 case PROCESSOR_COREI7_64:
24063 targetm.sched.dfa_post_advance_cycle
24064 = core2i7_dfa_post_advance_cycle;
24065 targetm.sched.first_cycle_multipass_init
24066 = core2i7_first_cycle_multipass_init;
24067 targetm.sched.first_cycle_multipass_begin
24068 = core2i7_first_cycle_multipass_begin;
24069 targetm.sched.first_cycle_multipass_issue
24070 = core2i7_first_cycle_multipass_issue;
24071 targetm.sched.first_cycle_multipass_backtrack
24072 = core2i7_first_cycle_multipass_backtrack;
24073 targetm.sched.first_cycle_multipass_end
24074 = core2i7_first_cycle_multipass_end;
24075 targetm.sched.first_cycle_multipass_fini
24076 = core2i7_first_cycle_multipass_fini;
24078 /* Set decoder parameters. */
24079 core2i7_secondary_decoder_max_insn_size = 8;
24080 core2i7_ifetch_block_size = 16;
24081 core2i7_ifetch_block_max_insns = 6;
24085 targetm.sched.dfa_post_advance_cycle = NULL;
24086 targetm.sched.first_cycle_multipass_init = NULL;
24087 targetm.sched.first_cycle_multipass_begin = NULL;
24088 targetm.sched.first_cycle_multipass_issue = NULL;
24089 targetm.sched.first_cycle_multipass_backtrack = NULL;
24090 targetm.sched.first_cycle_multipass_end = NULL;
24091 targetm.sched.first_cycle_multipass_fini = NULL;
24097 /* Compute the alignment given to a constant that is being placed in memory.
24098 EXP is the constant and ALIGN is the alignment that the object would
24100 The value of this function is used instead of that alignment to align
24104 ix86_constant_alignment (tree exp, int align)
24106 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
24107 || TREE_CODE (exp) == INTEGER_CST)
24109 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
24111 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
24114 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
24115 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
24116 return BITS_PER_WORD;
24121 /* Compute the alignment for a static variable.
24122 TYPE is the data type, and ALIGN is the alignment that
24123 the object would ordinarily have. The value of this function is used
24124 instead of that alignment to align the object. */
24127 ix86_data_alignment (tree type, int align)
24129 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
24131 if (AGGREGATE_TYPE_P (type)
24132 && TYPE_SIZE (type)
24133 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24134 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
24135 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
24136 && align < max_align)
24139 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24140 to 16byte boundary. */
24143 if (AGGREGATE_TYPE_P (type)
24144 && TYPE_SIZE (type)
24145 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24146 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
24147 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
24151 if (TREE_CODE (type) == ARRAY_TYPE)
24153 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
24155 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
24158 else if (TREE_CODE (type) == COMPLEX_TYPE)
24161 if (TYPE_MODE (type) == DCmode && align < 64)
24163 if ((TYPE_MODE (type) == XCmode
24164 || TYPE_MODE (type) == TCmode) && align < 128)
24167 else if ((TREE_CODE (type) == RECORD_TYPE
24168 || TREE_CODE (type) == UNION_TYPE
24169 || TREE_CODE (type) == QUAL_UNION_TYPE)
24170 && TYPE_FIELDS (type))
24172 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
24174 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
24177 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
24178 || TREE_CODE (type) == INTEGER_TYPE)
24180 if (TYPE_MODE (type) == DFmode && align < 64)
24182 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
24189 /* Compute the alignment for a local variable or a stack slot. EXP is
24190 the data type or decl itself, MODE is the widest mode available and
24191 ALIGN is the alignment that the object would ordinarily have. The
24192 value of this macro is used instead of that alignment to align the
24196 ix86_local_alignment (tree exp, enum machine_mode mode,
24197 unsigned int align)
24201 if (exp && DECL_P (exp))
24203 type = TREE_TYPE (exp);
24212 /* Don't do dynamic stack realignment for long long objects with
24213 -mpreferred-stack-boundary=2. */
24216 && ix86_preferred_stack_boundary < 64
24217 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
24218 && (!type || !TYPE_USER_ALIGN (type))
24219 && (!decl || !DECL_USER_ALIGN (decl)))
24222 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24223 register in MODE. We will return the largest alignment of XF
24227 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
24228 align = GET_MODE_ALIGNMENT (DFmode);
24232 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24233 to 16byte boundary. Exact wording is:
24235 An array uses the same alignment as its elements, except that a local or
24236 global array variable of length at least 16 bytes or
24237 a C99 variable-length array variable always has alignment of at least 16 bytes.
24239 This was added to allow use of aligned SSE instructions at arrays. This
24240 rule is meant for static storage (where compiler can not do the analysis
24241 by itself). We follow it for automatic variables only when convenient.
24242 We fully control everything in the function compiled and functions from
24243 other unit can not rely on the alignment.
24245 Exclude va_list type. It is the common case of local array where
24246 we can not benefit from the alignment. */
24247 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
24250 if (AGGREGATE_TYPE_P (type)
24251 && (va_list_type_node == NULL_TREE
24252 || (TYPE_MAIN_VARIANT (type)
24253 != TYPE_MAIN_VARIANT (va_list_type_node)))
24254 && TYPE_SIZE (type)
24255 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24256 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
24257 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
24260 if (TREE_CODE (type) == ARRAY_TYPE)
24262 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
24264 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
24267 else if (TREE_CODE (type) == COMPLEX_TYPE)
24269 if (TYPE_MODE (type) == DCmode && align < 64)
24271 if ((TYPE_MODE (type) == XCmode
24272 || TYPE_MODE (type) == TCmode) && align < 128)
24275 else if ((TREE_CODE (type) == RECORD_TYPE
24276 || TREE_CODE (type) == UNION_TYPE
24277 || TREE_CODE (type) == QUAL_UNION_TYPE)
24278 && TYPE_FIELDS (type))
24280 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
24282 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
24285 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
24286 || TREE_CODE (type) == INTEGER_TYPE)
24289 if (TYPE_MODE (type) == DFmode && align < 64)
24291 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
24297 /* Compute the minimum required alignment for dynamic stack realignment
24298 purposes for a local variable, parameter or a stack slot. EXP is
24299 the data type or decl itself, MODE is its mode and ALIGN is the
24300 alignment that the object would ordinarily have. */
24303 ix86_minimum_alignment (tree exp, enum machine_mode mode,
24304 unsigned int align)
24308 if (exp && DECL_P (exp))
24310 type = TREE_TYPE (exp);
24319 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
24322 /* Don't do dynamic stack realignment for long long objects with
24323 -mpreferred-stack-boundary=2. */
24324 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
24325 && (!type || !TYPE_USER_ALIGN (type))
24326 && (!decl || !DECL_USER_ALIGN (decl)))
24332 /* Find a location for the static chain incoming to a nested function.
24333 This is a register, unless all free registers are used by arguments. */
24336 ix86_static_chain (const_tree fndecl, bool incoming_p)
24340 if (!DECL_STATIC_CHAIN (fndecl))
24345 /* We always use R10 in 64-bit mode. */
24353 /* By default in 32-bit mode we use ECX to pass the static chain. */
24356 fntype = TREE_TYPE (fndecl);
24357 ccvt = ix86_get_callcvt (fntype);
24358 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
24360 /* Fastcall functions use ecx/edx for arguments, which leaves
24361 us with EAX for the static chain.
24362 Thiscall functions use ecx for arguments, which also
24363 leaves us with EAX for the static chain. */
24366 else if (ix86_function_regparm (fntype, fndecl) == 3)
24368 /* For regparm 3, we have no free call-clobbered registers in
24369 which to store the static chain. In order to implement this,
24370 we have the trampoline push the static chain to the stack.
24371 However, we can't push a value below the return address when
24372 we call the nested function directly, so we have to use an
24373 alternate entry point. For this we use ESI, and have the
24374 alternate entry point push ESI, so that things appear the
24375 same once we're executing the nested function. */
24378 if (fndecl == current_function_decl)
24379 ix86_static_chain_on_stack = true;
24380 return gen_frame_mem (SImode,
24381 plus_constant (arg_pointer_rtx, -8));
24387 return gen_rtx_REG (Pmode, regno);
24390 /* Emit RTL insns to initialize the variable parts of a trampoline.
24391 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24392 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24393 to be passed to the target function. */
24396 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
24402 fnaddr = XEXP (DECL_RTL (fndecl), 0);
24408 /* Load the function address to r11. Try to load address using
24409 the shorter movl instead of movabs. We may want to support
24410 movq for kernel mode, but kernel does not use trampolines at
24412 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
24414 fnaddr = copy_to_mode_reg (DImode, fnaddr);
24416 mem = adjust_address (m_tramp, HImode, offset);
24417 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
24419 mem = adjust_address (m_tramp, SImode, offset + 2);
24420 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
24425 mem = adjust_address (m_tramp, HImode, offset);
24426 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
24428 mem = adjust_address (m_tramp, DImode, offset + 2);
24429 emit_move_insn (mem, fnaddr);
24433 /* Load static chain using movabs to r10. Use the
24434 shorter movl instead of movabs for x32. */
24446 mem = adjust_address (m_tramp, HImode, offset);
24447 emit_move_insn (mem, gen_int_mode (opcode, HImode));
24449 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
24450 emit_move_insn (mem, chain_value);
24453 /* Jump to r11; the last (unused) byte is a nop, only there to
24454 pad the write out to a single 32-bit store. */
24455 mem = adjust_address (m_tramp, SImode, offset);
24456 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
24463 /* Depending on the static chain location, either load a register
24464 with a constant, or push the constant to the stack. All of the
24465 instructions are the same size. */
24466 chain = ix86_static_chain (fndecl, true);
24469 switch (REGNO (chain))
24472 opcode = 0xb8; break;
24474 opcode = 0xb9; break;
24476 gcc_unreachable ();
24482 mem = adjust_address (m_tramp, QImode, offset);
24483 emit_move_insn (mem, gen_int_mode (opcode, QImode));
24485 mem = adjust_address (m_tramp, SImode, offset + 1);
24486 emit_move_insn (mem, chain_value);
24489 mem = adjust_address (m_tramp, QImode, offset);
24490 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
24492 mem = adjust_address (m_tramp, SImode, offset + 1);
24494 /* Compute offset from the end of the jmp to the target function.
24495 In the case in which the trampoline stores the static chain on
24496 the stack, we need to skip the first insn which pushes the
24497 (call-saved) register static chain; this push is 1 byte. */
24499 disp = expand_binop (SImode, sub_optab, fnaddr,
24500 plus_constant (XEXP (m_tramp, 0),
24501 offset - (MEM_P (chain) ? 1 : 0)),
24502 NULL_RTX, 1, OPTAB_DIRECT);
24503 emit_move_insn (mem, disp);
24506 gcc_assert (offset <= TRAMPOLINE_SIZE);
24508 #ifdef HAVE_ENABLE_EXECUTE_STACK
24509 #ifdef CHECK_EXECUTE_STACK_ENABLED
24510 if (CHECK_EXECUTE_STACK_ENABLED)
24512 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
24513 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
24517 /* The following file contains several enumerations and data structures
24518 built from the definitions in i386-builtin-types.def. */
24520 #include "i386-builtin-types.inc"
24522 /* Table for the ix86 builtin non-function types. */
24523 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
24525 /* Retrieve an element from the above table, building some of
24526 the types lazily. */
24529 ix86_get_builtin_type (enum ix86_builtin_type tcode)
24531 unsigned int index;
24534 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
24536 type = ix86_builtin_type_tab[(int) tcode];
24540 gcc_assert (tcode > IX86_BT_LAST_PRIM);
24541 if (tcode <= IX86_BT_LAST_VECT)
24543 enum machine_mode mode;
24545 index = tcode - IX86_BT_LAST_PRIM - 1;
24546 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
24547 mode = ix86_builtin_type_vect_mode[index];
24549 type = build_vector_type_for_mode (itype, mode);
24555 index = tcode - IX86_BT_LAST_VECT - 1;
24556 if (tcode <= IX86_BT_LAST_PTR)
24557 quals = TYPE_UNQUALIFIED;
24559 quals = TYPE_QUAL_CONST;
24561 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
24562 if (quals != TYPE_UNQUALIFIED)
24563 itype = build_qualified_type (itype, quals);
24565 type = build_pointer_type (itype);
24568 ix86_builtin_type_tab[(int) tcode] = type;
24572 /* Table for the ix86 builtin function types. */
24573 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
24575 /* Retrieve an element from the above table, building some of
24576 the types lazily. */
24579 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
24583 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
24585 type = ix86_builtin_func_type_tab[(int) tcode];
24589 if (tcode <= IX86_BT_LAST_FUNC)
24591 unsigned start = ix86_builtin_func_start[(int) tcode];
24592 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
24593 tree rtype, atype, args = void_list_node;
24596 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
24597 for (i = after - 1; i > start; --i)
24599 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
24600 args = tree_cons (NULL, atype, args);
24603 type = build_function_type (rtype, args);
24607 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
24608 enum ix86_builtin_func_type icode;
24610 icode = ix86_builtin_func_alias_base[index];
24611 type = ix86_get_builtin_func_type (icode);
24614 ix86_builtin_func_type_tab[(int) tcode] = type;
24619 /* Codes for all the SSE/MMX builtins. */
24622 IX86_BUILTIN_ADDPS,
24623 IX86_BUILTIN_ADDSS,
24624 IX86_BUILTIN_DIVPS,
24625 IX86_BUILTIN_DIVSS,
24626 IX86_BUILTIN_MULPS,
24627 IX86_BUILTIN_MULSS,
24628 IX86_BUILTIN_SUBPS,
24629 IX86_BUILTIN_SUBSS,
24631 IX86_BUILTIN_CMPEQPS,
24632 IX86_BUILTIN_CMPLTPS,
24633 IX86_BUILTIN_CMPLEPS,
24634 IX86_BUILTIN_CMPGTPS,
24635 IX86_BUILTIN_CMPGEPS,
24636 IX86_BUILTIN_CMPNEQPS,
24637 IX86_BUILTIN_CMPNLTPS,
24638 IX86_BUILTIN_CMPNLEPS,
24639 IX86_BUILTIN_CMPNGTPS,
24640 IX86_BUILTIN_CMPNGEPS,
24641 IX86_BUILTIN_CMPORDPS,
24642 IX86_BUILTIN_CMPUNORDPS,
24643 IX86_BUILTIN_CMPEQSS,
24644 IX86_BUILTIN_CMPLTSS,
24645 IX86_BUILTIN_CMPLESS,
24646 IX86_BUILTIN_CMPNEQSS,
24647 IX86_BUILTIN_CMPNLTSS,
24648 IX86_BUILTIN_CMPNLESS,
24649 IX86_BUILTIN_CMPNGTSS,
24650 IX86_BUILTIN_CMPNGESS,
24651 IX86_BUILTIN_CMPORDSS,
24652 IX86_BUILTIN_CMPUNORDSS,
24654 IX86_BUILTIN_COMIEQSS,
24655 IX86_BUILTIN_COMILTSS,
24656 IX86_BUILTIN_COMILESS,
24657 IX86_BUILTIN_COMIGTSS,
24658 IX86_BUILTIN_COMIGESS,
24659 IX86_BUILTIN_COMINEQSS,
24660 IX86_BUILTIN_UCOMIEQSS,
24661 IX86_BUILTIN_UCOMILTSS,
24662 IX86_BUILTIN_UCOMILESS,
24663 IX86_BUILTIN_UCOMIGTSS,
24664 IX86_BUILTIN_UCOMIGESS,
24665 IX86_BUILTIN_UCOMINEQSS,
24667 IX86_BUILTIN_CVTPI2PS,
24668 IX86_BUILTIN_CVTPS2PI,
24669 IX86_BUILTIN_CVTSI2SS,
24670 IX86_BUILTIN_CVTSI642SS,
24671 IX86_BUILTIN_CVTSS2SI,
24672 IX86_BUILTIN_CVTSS2SI64,
24673 IX86_BUILTIN_CVTTPS2PI,
24674 IX86_BUILTIN_CVTTSS2SI,
24675 IX86_BUILTIN_CVTTSS2SI64,
24677 IX86_BUILTIN_MAXPS,
24678 IX86_BUILTIN_MAXSS,
24679 IX86_BUILTIN_MINPS,
24680 IX86_BUILTIN_MINSS,
24682 IX86_BUILTIN_LOADUPS,
24683 IX86_BUILTIN_STOREUPS,
24684 IX86_BUILTIN_MOVSS,
24686 IX86_BUILTIN_MOVHLPS,
24687 IX86_BUILTIN_MOVLHPS,
24688 IX86_BUILTIN_LOADHPS,
24689 IX86_BUILTIN_LOADLPS,
24690 IX86_BUILTIN_STOREHPS,
24691 IX86_BUILTIN_STORELPS,
24693 IX86_BUILTIN_MASKMOVQ,
24694 IX86_BUILTIN_MOVMSKPS,
24695 IX86_BUILTIN_PMOVMSKB,
24697 IX86_BUILTIN_MOVNTPS,
24698 IX86_BUILTIN_MOVNTQ,
24700 IX86_BUILTIN_LOADDQU,
24701 IX86_BUILTIN_STOREDQU,
24703 IX86_BUILTIN_PACKSSWB,
24704 IX86_BUILTIN_PACKSSDW,
24705 IX86_BUILTIN_PACKUSWB,
24707 IX86_BUILTIN_PADDB,
24708 IX86_BUILTIN_PADDW,
24709 IX86_BUILTIN_PADDD,
24710 IX86_BUILTIN_PADDQ,
24711 IX86_BUILTIN_PADDSB,
24712 IX86_BUILTIN_PADDSW,
24713 IX86_BUILTIN_PADDUSB,
24714 IX86_BUILTIN_PADDUSW,
24715 IX86_BUILTIN_PSUBB,
24716 IX86_BUILTIN_PSUBW,
24717 IX86_BUILTIN_PSUBD,
24718 IX86_BUILTIN_PSUBQ,
24719 IX86_BUILTIN_PSUBSB,
24720 IX86_BUILTIN_PSUBSW,
24721 IX86_BUILTIN_PSUBUSB,
24722 IX86_BUILTIN_PSUBUSW,
24725 IX86_BUILTIN_PANDN,
24729 IX86_BUILTIN_PAVGB,
24730 IX86_BUILTIN_PAVGW,
24732 IX86_BUILTIN_PCMPEQB,
24733 IX86_BUILTIN_PCMPEQW,
24734 IX86_BUILTIN_PCMPEQD,
24735 IX86_BUILTIN_PCMPGTB,
24736 IX86_BUILTIN_PCMPGTW,
24737 IX86_BUILTIN_PCMPGTD,
24739 IX86_BUILTIN_PMADDWD,
24741 IX86_BUILTIN_PMAXSW,
24742 IX86_BUILTIN_PMAXUB,
24743 IX86_BUILTIN_PMINSW,
24744 IX86_BUILTIN_PMINUB,
24746 IX86_BUILTIN_PMULHUW,
24747 IX86_BUILTIN_PMULHW,
24748 IX86_BUILTIN_PMULLW,
24750 IX86_BUILTIN_PSADBW,
24751 IX86_BUILTIN_PSHUFW,
24753 IX86_BUILTIN_PSLLW,
24754 IX86_BUILTIN_PSLLD,
24755 IX86_BUILTIN_PSLLQ,
24756 IX86_BUILTIN_PSRAW,
24757 IX86_BUILTIN_PSRAD,
24758 IX86_BUILTIN_PSRLW,
24759 IX86_BUILTIN_PSRLD,
24760 IX86_BUILTIN_PSRLQ,
24761 IX86_BUILTIN_PSLLWI,
24762 IX86_BUILTIN_PSLLDI,
24763 IX86_BUILTIN_PSLLQI,
24764 IX86_BUILTIN_PSRAWI,
24765 IX86_BUILTIN_PSRADI,
24766 IX86_BUILTIN_PSRLWI,
24767 IX86_BUILTIN_PSRLDI,
24768 IX86_BUILTIN_PSRLQI,
24770 IX86_BUILTIN_PUNPCKHBW,
24771 IX86_BUILTIN_PUNPCKHWD,
24772 IX86_BUILTIN_PUNPCKHDQ,
24773 IX86_BUILTIN_PUNPCKLBW,
24774 IX86_BUILTIN_PUNPCKLWD,
24775 IX86_BUILTIN_PUNPCKLDQ,
24777 IX86_BUILTIN_SHUFPS,
24779 IX86_BUILTIN_RCPPS,
24780 IX86_BUILTIN_RCPSS,
24781 IX86_BUILTIN_RSQRTPS,
24782 IX86_BUILTIN_RSQRTPS_NR,
24783 IX86_BUILTIN_RSQRTSS,
24784 IX86_BUILTIN_RSQRTF,
24785 IX86_BUILTIN_SQRTPS,
24786 IX86_BUILTIN_SQRTPS_NR,
24787 IX86_BUILTIN_SQRTSS,
24789 IX86_BUILTIN_UNPCKHPS,
24790 IX86_BUILTIN_UNPCKLPS,
24792 IX86_BUILTIN_ANDPS,
24793 IX86_BUILTIN_ANDNPS,
24795 IX86_BUILTIN_XORPS,
24798 IX86_BUILTIN_LDMXCSR,
24799 IX86_BUILTIN_STMXCSR,
24800 IX86_BUILTIN_SFENCE,
24802 /* 3DNow! Original */
24803 IX86_BUILTIN_FEMMS,
24804 IX86_BUILTIN_PAVGUSB,
24805 IX86_BUILTIN_PF2ID,
24806 IX86_BUILTIN_PFACC,
24807 IX86_BUILTIN_PFADD,
24808 IX86_BUILTIN_PFCMPEQ,
24809 IX86_BUILTIN_PFCMPGE,
24810 IX86_BUILTIN_PFCMPGT,
24811 IX86_BUILTIN_PFMAX,
24812 IX86_BUILTIN_PFMIN,
24813 IX86_BUILTIN_PFMUL,
24814 IX86_BUILTIN_PFRCP,
24815 IX86_BUILTIN_PFRCPIT1,
24816 IX86_BUILTIN_PFRCPIT2,
24817 IX86_BUILTIN_PFRSQIT1,
24818 IX86_BUILTIN_PFRSQRT,
24819 IX86_BUILTIN_PFSUB,
24820 IX86_BUILTIN_PFSUBR,
24821 IX86_BUILTIN_PI2FD,
24822 IX86_BUILTIN_PMULHRW,
24824 /* 3DNow! Athlon Extensions */
24825 IX86_BUILTIN_PF2IW,
24826 IX86_BUILTIN_PFNACC,
24827 IX86_BUILTIN_PFPNACC,
24828 IX86_BUILTIN_PI2FW,
24829 IX86_BUILTIN_PSWAPDSI,
24830 IX86_BUILTIN_PSWAPDSF,
24833 IX86_BUILTIN_ADDPD,
24834 IX86_BUILTIN_ADDSD,
24835 IX86_BUILTIN_DIVPD,
24836 IX86_BUILTIN_DIVSD,
24837 IX86_BUILTIN_MULPD,
24838 IX86_BUILTIN_MULSD,
24839 IX86_BUILTIN_SUBPD,
24840 IX86_BUILTIN_SUBSD,
24842 IX86_BUILTIN_CMPEQPD,
24843 IX86_BUILTIN_CMPLTPD,
24844 IX86_BUILTIN_CMPLEPD,
24845 IX86_BUILTIN_CMPGTPD,
24846 IX86_BUILTIN_CMPGEPD,
24847 IX86_BUILTIN_CMPNEQPD,
24848 IX86_BUILTIN_CMPNLTPD,
24849 IX86_BUILTIN_CMPNLEPD,
24850 IX86_BUILTIN_CMPNGTPD,
24851 IX86_BUILTIN_CMPNGEPD,
24852 IX86_BUILTIN_CMPORDPD,
24853 IX86_BUILTIN_CMPUNORDPD,
24854 IX86_BUILTIN_CMPEQSD,
24855 IX86_BUILTIN_CMPLTSD,
24856 IX86_BUILTIN_CMPLESD,
24857 IX86_BUILTIN_CMPNEQSD,
24858 IX86_BUILTIN_CMPNLTSD,
24859 IX86_BUILTIN_CMPNLESD,
24860 IX86_BUILTIN_CMPORDSD,
24861 IX86_BUILTIN_CMPUNORDSD,
24863 IX86_BUILTIN_COMIEQSD,
24864 IX86_BUILTIN_COMILTSD,
24865 IX86_BUILTIN_COMILESD,
24866 IX86_BUILTIN_COMIGTSD,
24867 IX86_BUILTIN_COMIGESD,
24868 IX86_BUILTIN_COMINEQSD,
24869 IX86_BUILTIN_UCOMIEQSD,
24870 IX86_BUILTIN_UCOMILTSD,
24871 IX86_BUILTIN_UCOMILESD,
24872 IX86_BUILTIN_UCOMIGTSD,
24873 IX86_BUILTIN_UCOMIGESD,
24874 IX86_BUILTIN_UCOMINEQSD,
24876 IX86_BUILTIN_MAXPD,
24877 IX86_BUILTIN_MAXSD,
24878 IX86_BUILTIN_MINPD,
24879 IX86_BUILTIN_MINSD,
24881 IX86_BUILTIN_ANDPD,
24882 IX86_BUILTIN_ANDNPD,
24884 IX86_BUILTIN_XORPD,
24886 IX86_BUILTIN_SQRTPD,
24887 IX86_BUILTIN_SQRTSD,
24889 IX86_BUILTIN_UNPCKHPD,
24890 IX86_BUILTIN_UNPCKLPD,
24892 IX86_BUILTIN_SHUFPD,
24894 IX86_BUILTIN_LOADUPD,
24895 IX86_BUILTIN_STOREUPD,
24896 IX86_BUILTIN_MOVSD,
24898 IX86_BUILTIN_LOADHPD,
24899 IX86_BUILTIN_LOADLPD,
24901 IX86_BUILTIN_CVTDQ2PD,
24902 IX86_BUILTIN_CVTDQ2PS,
24904 IX86_BUILTIN_CVTPD2DQ,
24905 IX86_BUILTIN_CVTPD2PI,
24906 IX86_BUILTIN_CVTPD2PS,
24907 IX86_BUILTIN_CVTTPD2DQ,
24908 IX86_BUILTIN_CVTTPD2PI,
24910 IX86_BUILTIN_CVTPI2PD,
24911 IX86_BUILTIN_CVTSI2SD,
24912 IX86_BUILTIN_CVTSI642SD,
24914 IX86_BUILTIN_CVTSD2SI,
24915 IX86_BUILTIN_CVTSD2SI64,
24916 IX86_BUILTIN_CVTSD2SS,
24917 IX86_BUILTIN_CVTSS2SD,
24918 IX86_BUILTIN_CVTTSD2SI,
24919 IX86_BUILTIN_CVTTSD2SI64,
24921 IX86_BUILTIN_CVTPS2DQ,
24922 IX86_BUILTIN_CVTPS2PD,
24923 IX86_BUILTIN_CVTTPS2DQ,
24925 IX86_BUILTIN_MOVNTI,
24926 IX86_BUILTIN_MOVNTI64,
24927 IX86_BUILTIN_MOVNTPD,
24928 IX86_BUILTIN_MOVNTDQ,
24930 IX86_BUILTIN_MOVQ128,
24933 IX86_BUILTIN_MASKMOVDQU,
24934 IX86_BUILTIN_MOVMSKPD,
24935 IX86_BUILTIN_PMOVMSKB128,
24937 IX86_BUILTIN_PACKSSWB128,
24938 IX86_BUILTIN_PACKSSDW128,
24939 IX86_BUILTIN_PACKUSWB128,
24941 IX86_BUILTIN_PADDB128,
24942 IX86_BUILTIN_PADDW128,
24943 IX86_BUILTIN_PADDD128,
24944 IX86_BUILTIN_PADDQ128,
24945 IX86_BUILTIN_PADDSB128,
24946 IX86_BUILTIN_PADDSW128,
24947 IX86_BUILTIN_PADDUSB128,
24948 IX86_BUILTIN_PADDUSW128,
24949 IX86_BUILTIN_PSUBB128,
24950 IX86_BUILTIN_PSUBW128,
24951 IX86_BUILTIN_PSUBD128,
24952 IX86_BUILTIN_PSUBQ128,
24953 IX86_BUILTIN_PSUBSB128,
24954 IX86_BUILTIN_PSUBSW128,
24955 IX86_BUILTIN_PSUBUSB128,
24956 IX86_BUILTIN_PSUBUSW128,
24958 IX86_BUILTIN_PAND128,
24959 IX86_BUILTIN_PANDN128,
24960 IX86_BUILTIN_POR128,
24961 IX86_BUILTIN_PXOR128,
24963 IX86_BUILTIN_PAVGB128,
24964 IX86_BUILTIN_PAVGW128,
24966 IX86_BUILTIN_PCMPEQB128,
24967 IX86_BUILTIN_PCMPEQW128,
24968 IX86_BUILTIN_PCMPEQD128,
24969 IX86_BUILTIN_PCMPGTB128,
24970 IX86_BUILTIN_PCMPGTW128,
24971 IX86_BUILTIN_PCMPGTD128,
24973 IX86_BUILTIN_PMADDWD128,
24975 IX86_BUILTIN_PMAXSW128,
24976 IX86_BUILTIN_PMAXUB128,
24977 IX86_BUILTIN_PMINSW128,
24978 IX86_BUILTIN_PMINUB128,
24980 IX86_BUILTIN_PMULUDQ,
24981 IX86_BUILTIN_PMULUDQ128,
24982 IX86_BUILTIN_PMULHUW128,
24983 IX86_BUILTIN_PMULHW128,
24984 IX86_BUILTIN_PMULLW128,
24986 IX86_BUILTIN_PSADBW128,
24987 IX86_BUILTIN_PSHUFHW,
24988 IX86_BUILTIN_PSHUFLW,
24989 IX86_BUILTIN_PSHUFD,
24991 IX86_BUILTIN_PSLLDQI128,
24992 IX86_BUILTIN_PSLLWI128,
24993 IX86_BUILTIN_PSLLDI128,
24994 IX86_BUILTIN_PSLLQI128,
24995 IX86_BUILTIN_PSRAWI128,
24996 IX86_BUILTIN_PSRADI128,
24997 IX86_BUILTIN_PSRLDQI128,
24998 IX86_BUILTIN_PSRLWI128,
24999 IX86_BUILTIN_PSRLDI128,
25000 IX86_BUILTIN_PSRLQI128,
25002 IX86_BUILTIN_PSLLDQ128,
25003 IX86_BUILTIN_PSLLW128,
25004 IX86_BUILTIN_PSLLD128,
25005 IX86_BUILTIN_PSLLQ128,
25006 IX86_BUILTIN_PSRAW128,
25007 IX86_BUILTIN_PSRAD128,
25008 IX86_BUILTIN_PSRLW128,
25009 IX86_BUILTIN_PSRLD128,
25010 IX86_BUILTIN_PSRLQ128,
25012 IX86_BUILTIN_PUNPCKHBW128,
25013 IX86_BUILTIN_PUNPCKHWD128,
25014 IX86_BUILTIN_PUNPCKHDQ128,
25015 IX86_BUILTIN_PUNPCKHQDQ128,
25016 IX86_BUILTIN_PUNPCKLBW128,
25017 IX86_BUILTIN_PUNPCKLWD128,
25018 IX86_BUILTIN_PUNPCKLDQ128,
25019 IX86_BUILTIN_PUNPCKLQDQ128,
25021 IX86_BUILTIN_CLFLUSH,
25022 IX86_BUILTIN_MFENCE,
25023 IX86_BUILTIN_LFENCE,
25024 IX86_BUILTIN_PAUSE,
25026 IX86_BUILTIN_BSRSI,
25027 IX86_BUILTIN_BSRDI,
25028 IX86_BUILTIN_RDPMC,
25029 IX86_BUILTIN_RDTSC,
25030 IX86_BUILTIN_RDTSCP,
25031 IX86_BUILTIN_ROLQI,
25032 IX86_BUILTIN_ROLHI,
25033 IX86_BUILTIN_RORQI,
25034 IX86_BUILTIN_RORHI,
25037 IX86_BUILTIN_ADDSUBPS,
25038 IX86_BUILTIN_HADDPS,
25039 IX86_BUILTIN_HSUBPS,
25040 IX86_BUILTIN_MOVSHDUP,
25041 IX86_BUILTIN_MOVSLDUP,
25042 IX86_BUILTIN_ADDSUBPD,
25043 IX86_BUILTIN_HADDPD,
25044 IX86_BUILTIN_HSUBPD,
25045 IX86_BUILTIN_LDDQU,
25047 IX86_BUILTIN_MONITOR,
25048 IX86_BUILTIN_MWAIT,
25051 IX86_BUILTIN_PHADDW,
25052 IX86_BUILTIN_PHADDD,
25053 IX86_BUILTIN_PHADDSW,
25054 IX86_BUILTIN_PHSUBW,
25055 IX86_BUILTIN_PHSUBD,
25056 IX86_BUILTIN_PHSUBSW,
25057 IX86_BUILTIN_PMADDUBSW,
25058 IX86_BUILTIN_PMULHRSW,
25059 IX86_BUILTIN_PSHUFB,
25060 IX86_BUILTIN_PSIGNB,
25061 IX86_BUILTIN_PSIGNW,
25062 IX86_BUILTIN_PSIGND,
25063 IX86_BUILTIN_PALIGNR,
25064 IX86_BUILTIN_PABSB,
25065 IX86_BUILTIN_PABSW,
25066 IX86_BUILTIN_PABSD,
25068 IX86_BUILTIN_PHADDW128,
25069 IX86_BUILTIN_PHADDD128,
25070 IX86_BUILTIN_PHADDSW128,
25071 IX86_BUILTIN_PHSUBW128,
25072 IX86_BUILTIN_PHSUBD128,
25073 IX86_BUILTIN_PHSUBSW128,
25074 IX86_BUILTIN_PMADDUBSW128,
25075 IX86_BUILTIN_PMULHRSW128,
25076 IX86_BUILTIN_PSHUFB128,
25077 IX86_BUILTIN_PSIGNB128,
25078 IX86_BUILTIN_PSIGNW128,
25079 IX86_BUILTIN_PSIGND128,
25080 IX86_BUILTIN_PALIGNR128,
25081 IX86_BUILTIN_PABSB128,
25082 IX86_BUILTIN_PABSW128,
25083 IX86_BUILTIN_PABSD128,
25085 /* AMDFAM10 - SSE4A New Instructions. */
25086 IX86_BUILTIN_MOVNTSD,
25087 IX86_BUILTIN_MOVNTSS,
25088 IX86_BUILTIN_EXTRQI,
25089 IX86_BUILTIN_EXTRQ,
25090 IX86_BUILTIN_INSERTQI,
25091 IX86_BUILTIN_INSERTQ,
25094 IX86_BUILTIN_BLENDPD,
25095 IX86_BUILTIN_BLENDPS,
25096 IX86_BUILTIN_BLENDVPD,
25097 IX86_BUILTIN_BLENDVPS,
25098 IX86_BUILTIN_PBLENDVB128,
25099 IX86_BUILTIN_PBLENDW128,
25104 IX86_BUILTIN_INSERTPS128,
25106 IX86_BUILTIN_MOVNTDQA,
25107 IX86_BUILTIN_MPSADBW128,
25108 IX86_BUILTIN_PACKUSDW128,
25109 IX86_BUILTIN_PCMPEQQ,
25110 IX86_BUILTIN_PHMINPOSUW128,
25112 IX86_BUILTIN_PMAXSB128,
25113 IX86_BUILTIN_PMAXSD128,
25114 IX86_BUILTIN_PMAXUD128,
25115 IX86_BUILTIN_PMAXUW128,
25117 IX86_BUILTIN_PMINSB128,
25118 IX86_BUILTIN_PMINSD128,
25119 IX86_BUILTIN_PMINUD128,
25120 IX86_BUILTIN_PMINUW128,
25122 IX86_BUILTIN_PMOVSXBW128,
25123 IX86_BUILTIN_PMOVSXBD128,
25124 IX86_BUILTIN_PMOVSXBQ128,
25125 IX86_BUILTIN_PMOVSXWD128,
25126 IX86_BUILTIN_PMOVSXWQ128,
25127 IX86_BUILTIN_PMOVSXDQ128,
25129 IX86_BUILTIN_PMOVZXBW128,
25130 IX86_BUILTIN_PMOVZXBD128,
25131 IX86_BUILTIN_PMOVZXBQ128,
25132 IX86_BUILTIN_PMOVZXWD128,
25133 IX86_BUILTIN_PMOVZXWQ128,
25134 IX86_BUILTIN_PMOVZXDQ128,
25136 IX86_BUILTIN_PMULDQ128,
25137 IX86_BUILTIN_PMULLD128,
25139 IX86_BUILTIN_ROUNDSD,
25140 IX86_BUILTIN_ROUNDSS,
25142 IX86_BUILTIN_ROUNDPD,
25143 IX86_BUILTIN_ROUNDPS,
25145 IX86_BUILTIN_FLOORPD,
25146 IX86_BUILTIN_CEILPD,
25147 IX86_BUILTIN_TRUNCPD,
25148 IX86_BUILTIN_RINTPD,
25149 IX86_BUILTIN_ROUNDPD_AZ,
25151 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
25152 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
25153 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
25155 IX86_BUILTIN_FLOORPS,
25156 IX86_BUILTIN_CEILPS,
25157 IX86_BUILTIN_TRUNCPS,
25158 IX86_BUILTIN_RINTPS,
25159 IX86_BUILTIN_ROUNDPS_AZ,
25161 IX86_BUILTIN_FLOORPS_SFIX,
25162 IX86_BUILTIN_CEILPS_SFIX,
25163 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
25165 IX86_BUILTIN_PTESTZ,
25166 IX86_BUILTIN_PTESTC,
25167 IX86_BUILTIN_PTESTNZC,
25169 IX86_BUILTIN_VEC_INIT_V2SI,
25170 IX86_BUILTIN_VEC_INIT_V4HI,
25171 IX86_BUILTIN_VEC_INIT_V8QI,
25172 IX86_BUILTIN_VEC_EXT_V2DF,
25173 IX86_BUILTIN_VEC_EXT_V2DI,
25174 IX86_BUILTIN_VEC_EXT_V4SF,
25175 IX86_BUILTIN_VEC_EXT_V4SI,
25176 IX86_BUILTIN_VEC_EXT_V8HI,
25177 IX86_BUILTIN_VEC_EXT_V2SI,
25178 IX86_BUILTIN_VEC_EXT_V4HI,
25179 IX86_BUILTIN_VEC_EXT_V16QI,
25180 IX86_BUILTIN_VEC_SET_V2DI,
25181 IX86_BUILTIN_VEC_SET_V4SF,
25182 IX86_BUILTIN_VEC_SET_V4SI,
25183 IX86_BUILTIN_VEC_SET_V8HI,
25184 IX86_BUILTIN_VEC_SET_V4HI,
25185 IX86_BUILTIN_VEC_SET_V16QI,
25187 IX86_BUILTIN_VEC_PACK_SFIX,
25188 IX86_BUILTIN_VEC_PACK_SFIX256,
25191 IX86_BUILTIN_CRC32QI,
25192 IX86_BUILTIN_CRC32HI,
25193 IX86_BUILTIN_CRC32SI,
25194 IX86_BUILTIN_CRC32DI,
25196 IX86_BUILTIN_PCMPESTRI128,
25197 IX86_BUILTIN_PCMPESTRM128,
25198 IX86_BUILTIN_PCMPESTRA128,
25199 IX86_BUILTIN_PCMPESTRC128,
25200 IX86_BUILTIN_PCMPESTRO128,
25201 IX86_BUILTIN_PCMPESTRS128,
25202 IX86_BUILTIN_PCMPESTRZ128,
25203 IX86_BUILTIN_PCMPISTRI128,
25204 IX86_BUILTIN_PCMPISTRM128,
25205 IX86_BUILTIN_PCMPISTRA128,
25206 IX86_BUILTIN_PCMPISTRC128,
25207 IX86_BUILTIN_PCMPISTRO128,
25208 IX86_BUILTIN_PCMPISTRS128,
25209 IX86_BUILTIN_PCMPISTRZ128,
25211 IX86_BUILTIN_PCMPGTQ,
25213 /* AES instructions */
25214 IX86_BUILTIN_AESENC128,
25215 IX86_BUILTIN_AESENCLAST128,
25216 IX86_BUILTIN_AESDEC128,
25217 IX86_BUILTIN_AESDECLAST128,
25218 IX86_BUILTIN_AESIMC128,
25219 IX86_BUILTIN_AESKEYGENASSIST128,
25221 /* PCLMUL instruction */
25222 IX86_BUILTIN_PCLMULQDQ128,
25225 IX86_BUILTIN_ADDPD256,
25226 IX86_BUILTIN_ADDPS256,
25227 IX86_BUILTIN_ADDSUBPD256,
25228 IX86_BUILTIN_ADDSUBPS256,
25229 IX86_BUILTIN_ANDPD256,
25230 IX86_BUILTIN_ANDPS256,
25231 IX86_BUILTIN_ANDNPD256,
25232 IX86_BUILTIN_ANDNPS256,
25233 IX86_BUILTIN_BLENDPD256,
25234 IX86_BUILTIN_BLENDPS256,
25235 IX86_BUILTIN_BLENDVPD256,
25236 IX86_BUILTIN_BLENDVPS256,
25237 IX86_BUILTIN_DIVPD256,
25238 IX86_BUILTIN_DIVPS256,
25239 IX86_BUILTIN_DPPS256,
25240 IX86_BUILTIN_HADDPD256,
25241 IX86_BUILTIN_HADDPS256,
25242 IX86_BUILTIN_HSUBPD256,
25243 IX86_BUILTIN_HSUBPS256,
25244 IX86_BUILTIN_MAXPD256,
25245 IX86_BUILTIN_MAXPS256,
25246 IX86_BUILTIN_MINPD256,
25247 IX86_BUILTIN_MINPS256,
25248 IX86_BUILTIN_MULPD256,
25249 IX86_BUILTIN_MULPS256,
25250 IX86_BUILTIN_ORPD256,
25251 IX86_BUILTIN_ORPS256,
25252 IX86_BUILTIN_SHUFPD256,
25253 IX86_BUILTIN_SHUFPS256,
25254 IX86_BUILTIN_SUBPD256,
25255 IX86_BUILTIN_SUBPS256,
25256 IX86_BUILTIN_XORPD256,
25257 IX86_BUILTIN_XORPS256,
25258 IX86_BUILTIN_CMPSD,
25259 IX86_BUILTIN_CMPSS,
25260 IX86_BUILTIN_CMPPD,
25261 IX86_BUILTIN_CMPPS,
25262 IX86_BUILTIN_CMPPD256,
25263 IX86_BUILTIN_CMPPS256,
25264 IX86_BUILTIN_CVTDQ2PD256,
25265 IX86_BUILTIN_CVTDQ2PS256,
25266 IX86_BUILTIN_CVTPD2PS256,
25267 IX86_BUILTIN_CVTPS2DQ256,
25268 IX86_BUILTIN_CVTPS2PD256,
25269 IX86_BUILTIN_CVTTPD2DQ256,
25270 IX86_BUILTIN_CVTPD2DQ256,
25271 IX86_BUILTIN_CVTTPS2DQ256,
25272 IX86_BUILTIN_EXTRACTF128PD256,
25273 IX86_BUILTIN_EXTRACTF128PS256,
25274 IX86_BUILTIN_EXTRACTF128SI256,
25275 IX86_BUILTIN_VZEROALL,
25276 IX86_BUILTIN_VZEROUPPER,
25277 IX86_BUILTIN_VPERMILVARPD,
25278 IX86_BUILTIN_VPERMILVARPS,
25279 IX86_BUILTIN_VPERMILVARPD256,
25280 IX86_BUILTIN_VPERMILVARPS256,
25281 IX86_BUILTIN_VPERMILPD,
25282 IX86_BUILTIN_VPERMILPS,
25283 IX86_BUILTIN_VPERMILPD256,
25284 IX86_BUILTIN_VPERMILPS256,
25285 IX86_BUILTIN_VPERMIL2PD,
25286 IX86_BUILTIN_VPERMIL2PS,
25287 IX86_BUILTIN_VPERMIL2PD256,
25288 IX86_BUILTIN_VPERMIL2PS256,
25289 IX86_BUILTIN_VPERM2F128PD256,
25290 IX86_BUILTIN_VPERM2F128PS256,
25291 IX86_BUILTIN_VPERM2F128SI256,
25292 IX86_BUILTIN_VBROADCASTSS,
25293 IX86_BUILTIN_VBROADCASTSD256,
25294 IX86_BUILTIN_VBROADCASTSS256,
25295 IX86_BUILTIN_VBROADCASTPD256,
25296 IX86_BUILTIN_VBROADCASTPS256,
25297 IX86_BUILTIN_VINSERTF128PD256,
25298 IX86_BUILTIN_VINSERTF128PS256,
25299 IX86_BUILTIN_VINSERTF128SI256,
25300 IX86_BUILTIN_LOADUPD256,
25301 IX86_BUILTIN_LOADUPS256,
25302 IX86_BUILTIN_STOREUPD256,
25303 IX86_BUILTIN_STOREUPS256,
25304 IX86_BUILTIN_LDDQU256,
25305 IX86_BUILTIN_MOVNTDQ256,
25306 IX86_BUILTIN_MOVNTPD256,
25307 IX86_BUILTIN_MOVNTPS256,
25308 IX86_BUILTIN_LOADDQU256,
25309 IX86_BUILTIN_STOREDQU256,
25310 IX86_BUILTIN_MASKLOADPD,
25311 IX86_BUILTIN_MASKLOADPS,
25312 IX86_BUILTIN_MASKSTOREPD,
25313 IX86_BUILTIN_MASKSTOREPS,
25314 IX86_BUILTIN_MASKLOADPD256,
25315 IX86_BUILTIN_MASKLOADPS256,
25316 IX86_BUILTIN_MASKSTOREPD256,
25317 IX86_BUILTIN_MASKSTOREPS256,
25318 IX86_BUILTIN_MOVSHDUP256,
25319 IX86_BUILTIN_MOVSLDUP256,
25320 IX86_BUILTIN_MOVDDUP256,
25322 IX86_BUILTIN_SQRTPD256,
25323 IX86_BUILTIN_SQRTPS256,
25324 IX86_BUILTIN_SQRTPS_NR256,
25325 IX86_BUILTIN_RSQRTPS256,
25326 IX86_BUILTIN_RSQRTPS_NR256,
25328 IX86_BUILTIN_RCPPS256,
25330 IX86_BUILTIN_ROUNDPD256,
25331 IX86_BUILTIN_ROUNDPS256,
25333 IX86_BUILTIN_FLOORPD256,
25334 IX86_BUILTIN_CEILPD256,
25335 IX86_BUILTIN_TRUNCPD256,
25336 IX86_BUILTIN_RINTPD256,
25337 IX86_BUILTIN_ROUNDPD_AZ256,
25339 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
25340 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
25341 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
25343 IX86_BUILTIN_FLOORPS256,
25344 IX86_BUILTIN_CEILPS256,
25345 IX86_BUILTIN_TRUNCPS256,
25346 IX86_BUILTIN_RINTPS256,
25347 IX86_BUILTIN_ROUNDPS_AZ256,
25349 IX86_BUILTIN_FLOORPS_SFIX256,
25350 IX86_BUILTIN_CEILPS_SFIX256,
25351 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
25353 IX86_BUILTIN_UNPCKHPD256,
25354 IX86_BUILTIN_UNPCKLPD256,
25355 IX86_BUILTIN_UNPCKHPS256,
25356 IX86_BUILTIN_UNPCKLPS256,
25358 IX86_BUILTIN_SI256_SI,
25359 IX86_BUILTIN_PS256_PS,
25360 IX86_BUILTIN_PD256_PD,
25361 IX86_BUILTIN_SI_SI256,
25362 IX86_BUILTIN_PS_PS256,
25363 IX86_BUILTIN_PD_PD256,
25365 IX86_BUILTIN_VTESTZPD,
25366 IX86_BUILTIN_VTESTCPD,
25367 IX86_BUILTIN_VTESTNZCPD,
25368 IX86_BUILTIN_VTESTZPS,
25369 IX86_BUILTIN_VTESTCPS,
25370 IX86_BUILTIN_VTESTNZCPS,
25371 IX86_BUILTIN_VTESTZPD256,
25372 IX86_BUILTIN_VTESTCPD256,
25373 IX86_BUILTIN_VTESTNZCPD256,
25374 IX86_BUILTIN_VTESTZPS256,
25375 IX86_BUILTIN_VTESTCPS256,
25376 IX86_BUILTIN_VTESTNZCPS256,
25377 IX86_BUILTIN_PTESTZ256,
25378 IX86_BUILTIN_PTESTC256,
25379 IX86_BUILTIN_PTESTNZC256,
25381 IX86_BUILTIN_MOVMSKPD256,
25382 IX86_BUILTIN_MOVMSKPS256,
25385 IX86_BUILTIN_MPSADBW256,
25386 IX86_BUILTIN_PABSB256,
25387 IX86_BUILTIN_PABSW256,
25388 IX86_BUILTIN_PABSD256,
25389 IX86_BUILTIN_PACKSSDW256,
25390 IX86_BUILTIN_PACKSSWB256,
25391 IX86_BUILTIN_PACKUSDW256,
25392 IX86_BUILTIN_PACKUSWB256,
25393 IX86_BUILTIN_PADDB256,
25394 IX86_BUILTIN_PADDW256,
25395 IX86_BUILTIN_PADDD256,
25396 IX86_BUILTIN_PADDQ256,
25397 IX86_BUILTIN_PADDSB256,
25398 IX86_BUILTIN_PADDSW256,
25399 IX86_BUILTIN_PADDUSB256,
25400 IX86_BUILTIN_PADDUSW256,
25401 IX86_BUILTIN_PALIGNR256,
25402 IX86_BUILTIN_AND256I,
25403 IX86_BUILTIN_ANDNOT256I,
25404 IX86_BUILTIN_PAVGB256,
25405 IX86_BUILTIN_PAVGW256,
25406 IX86_BUILTIN_PBLENDVB256,
25407 IX86_BUILTIN_PBLENDVW256,
25408 IX86_BUILTIN_PCMPEQB256,
25409 IX86_BUILTIN_PCMPEQW256,
25410 IX86_BUILTIN_PCMPEQD256,
25411 IX86_BUILTIN_PCMPEQQ256,
25412 IX86_BUILTIN_PCMPGTB256,
25413 IX86_BUILTIN_PCMPGTW256,
25414 IX86_BUILTIN_PCMPGTD256,
25415 IX86_BUILTIN_PCMPGTQ256,
25416 IX86_BUILTIN_PHADDW256,
25417 IX86_BUILTIN_PHADDD256,
25418 IX86_BUILTIN_PHADDSW256,
25419 IX86_BUILTIN_PHSUBW256,
25420 IX86_BUILTIN_PHSUBD256,
25421 IX86_BUILTIN_PHSUBSW256,
25422 IX86_BUILTIN_PMADDUBSW256,
25423 IX86_BUILTIN_PMADDWD256,
25424 IX86_BUILTIN_PMAXSB256,
25425 IX86_BUILTIN_PMAXSW256,
25426 IX86_BUILTIN_PMAXSD256,
25427 IX86_BUILTIN_PMAXUB256,
25428 IX86_BUILTIN_PMAXUW256,
25429 IX86_BUILTIN_PMAXUD256,
25430 IX86_BUILTIN_PMINSB256,
25431 IX86_BUILTIN_PMINSW256,
25432 IX86_BUILTIN_PMINSD256,
25433 IX86_BUILTIN_PMINUB256,
25434 IX86_BUILTIN_PMINUW256,
25435 IX86_BUILTIN_PMINUD256,
25436 IX86_BUILTIN_PMOVMSKB256,
25437 IX86_BUILTIN_PMOVSXBW256,
25438 IX86_BUILTIN_PMOVSXBD256,
25439 IX86_BUILTIN_PMOVSXBQ256,
25440 IX86_BUILTIN_PMOVSXWD256,
25441 IX86_BUILTIN_PMOVSXWQ256,
25442 IX86_BUILTIN_PMOVSXDQ256,
25443 IX86_BUILTIN_PMOVZXBW256,
25444 IX86_BUILTIN_PMOVZXBD256,
25445 IX86_BUILTIN_PMOVZXBQ256,
25446 IX86_BUILTIN_PMOVZXWD256,
25447 IX86_BUILTIN_PMOVZXWQ256,
25448 IX86_BUILTIN_PMOVZXDQ256,
25449 IX86_BUILTIN_PMULDQ256,
25450 IX86_BUILTIN_PMULHRSW256,
25451 IX86_BUILTIN_PMULHUW256,
25452 IX86_BUILTIN_PMULHW256,
25453 IX86_BUILTIN_PMULLW256,
25454 IX86_BUILTIN_PMULLD256,
25455 IX86_BUILTIN_PMULUDQ256,
25456 IX86_BUILTIN_POR256,
25457 IX86_BUILTIN_PSADBW256,
25458 IX86_BUILTIN_PSHUFB256,
25459 IX86_BUILTIN_PSHUFD256,
25460 IX86_BUILTIN_PSHUFHW256,
25461 IX86_BUILTIN_PSHUFLW256,
25462 IX86_BUILTIN_PSIGNB256,
25463 IX86_BUILTIN_PSIGNW256,
25464 IX86_BUILTIN_PSIGND256,
25465 IX86_BUILTIN_PSLLDQI256,
25466 IX86_BUILTIN_PSLLWI256,
25467 IX86_BUILTIN_PSLLW256,
25468 IX86_BUILTIN_PSLLDI256,
25469 IX86_BUILTIN_PSLLD256,
25470 IX86_BUILTIN_PSLLQI256,
25471 IX86_BUILTIN_PSLLQ256,
25472 IX86_BUILTIN_PSRAWI256,
25473 IX86_BUILTIN_PSRAW256,
25474 IX86_BUILTIN_PSRADI256,
25475 IX86_BUILTIN_PSRAD256,
25476 IX86_BUILTIN_PSRLDQI256,
25477 IX86_BUILTIN_PSRLWI256,
25478 IX86_BUILTIN_PSRLW256,
25479 IX86_BUILTIN_PSRLDI256,
25480 IX86_BUILTIN_PSRLD256,
25481 IX86_BUILTIN_PSRLQI256,
25482 IX86_BUILTIN_PSRLQ256,
25483 IX86_BUILTIN_PSUBB256,
25484 IX86_BUILTIN_PSUBW256,
25485 IX86_BUILTIN_PSUBD256,
25486 IX86_BUILTIN_PSUBQ256,
25487 IX86_BUILTIN_PSUBSB256,
25488 IX86_BUILTIN_PSUBSW256,
25489 IX86_BUILTIN_PSUBUSB256,
25490 IX86_BUILTIN_PSUBUSW256,
25491 IX86_BUILTIN_PUNPCKHBW256,
25492 IX86_BUILTIN_PUNPCKHWD256,
25493 IX86_BUILTIN_PUNPCKHDQ256,
25494 IX86_BUILTIN_PUNPCKHQDQ256,
25495 IX86_BUILTIN_PUNPCKLBW256,
25496 IX86_BUILTIN_PUNPCKLWD256,
25497 IX86_BUILTIN_PUNPCKLDQ256,
25498 IX86_BUILTIN_PUNPCKLQDQ256,
25499 IX86_BUILTIN_PXOR256,
25500 IX86_BUILTIN_MOVNTDQA256,
25501 IX86_BUILTIN_VBROADCASTSS_PS,
25502 IX86_BUILTIN_VBROADCASTSS_PS256,
25503 IX86_BUILTIN_VBROADCASTSD_PD256,
25504 IX86_BUILTIN_VBROADCASTSI256,
25505 IX86_BUILTIN_PBLENDD256,
25506 IX86_BUILTIN_PBLENDD128,
25507 IX86_BUILTIN_PBROADCASTB256,
25508 IX86_BUILTIN_PBROADCASTW256,
25509 IX86_BUILTIN_PBROADCASTD256,
25510 IX86_BUILTIN_PBROADCASTQ256,
25511 IX86_BUILTIN_PBROADCASTB128,
25512 IX86_BUILTIN_PBROADCASTW128,
25513 IX86_BUILTIN_PBROADCASTD128,
25514 IX86_BUILTIN_PBROADCASTQ128,
25515 IX86_BUILTIN_VPERMVARSI256,
25516 IX86_BUILTIN_VPERMDF256,
25517 IX86_BUILTIN_VPERMVARSF256,
25518 IX86_BUILTIN_VPERMDI256,
25519 IX86_BUILTIN_VPERMTI256,
25520 IX86_BUILTIN_VEXTRACT128I256,
25521 IX86_BUILTIN_VINSERT128I256,
25522 IX86_BUILTIN_MASKLOADD,
25523 IX86_BUILTIN_MASKLOADQ,
25524 IX86_BUILTIN_MASKLOADD256,
25525 IX86_BUILTIN_MASKLOADQ256,
25526 IX86_BUILTIN_MASKSTORED,
25527 IX86_BUILTIN_MASKSTOREQ,
25528 IX86_BUILTIN_MASKSTORED256,
25529 IX86_BUILTIN_MASKSTOREQ256,
25530 IX86_BUILTIN_PSLLVV4DI,
25531 IX86_BUILTIN_PSLLVV2DI,
25532 IX86_BUILTIN_PSLLVV8SI,
25533 IX86_BUILTIN_PSLLVV4SI,
25534 IX86_BUILTIN_PSRAVV8SI,
25535 IX86_BUILTIN_PSRAVV4SI,
25536 IX86_BUILTIN_PSRLVV4DI,
25537 IX86_BUILTIN_PSRLVV2DI,
25538 IX86_BUILTIN_PSRLVV8SI,
25539 IX86_BUILTIN_PSRLVV4SI,
25541 IX86_BUILTIN_GATHERSIV2DF,
25542 IX86_BUILTIN_GATHERSIV4DF,
25543 IX86_BUILTIN_GATHERDIV2DF,
25544 IX86_BUILTIN_GATHERDIV4DF,
25545 IX86_BUILTIN_GATHERSIV4SF,
25546 IX86_BUILTIN_GATHERSIV8SF,
25547 IX86_BUILTIN_GATHERDIV4SF,
25548 IX86_BUILTIN_GATHERDIV8SF,
25549 IX86_BUILTIN_GATHERSIV2DI,
25550 IX86_BUILTIN_GATHERSIV4DI,
25551 IX86_BUILTIN_GATHERDIV2DI,
25552 IX86_BUILTIN_GATHERDIV4DI,
25553 IX86_BUILTIN_GATHERSIV4SI,
25554 IX86_BUILTIN_GATHERSIV8SI,
25555 IX86_BUILTIN_GATHERDIV4SI,
25556 IX86_BUILTIN_GATHERDIV8SI,
25558 /* Alternate 4 element gather for the vectorizer where
25559 all operands are 32-byte wide. */
25560 IX86_BUILTIN_GATHERALTSIV4DF,
25561 IX86_BUILTIN_GATHERALTDIV8SF,
25562 IX86_BUILTIN_GATHERALTSIV4DI,
25563 IX86_BUILTIN_GATHERALTDIV8SI,
25565 /* TFmode support builtins. */
25567 IX86_BUILTIN_HUGE_VALQ,
25568 IX86_BUILTIN_FABSQ,
25569 IX86_BUILTIN_COPYSIGNQ,
25571 /* Vectorizer support builtins. */
25572 IX86_BUILTIN_CPYSGNPS,
25573 IX86_BUILTIN_CPYSGNPD,
25574 IX86_BUILTIN_CPYSGNPS256,
25575 IX86_BUILTIN_CPYSGNPD256,
25577 /* FMA4 instructions. */
25578 IX86_BUILTIN_VFMADDSS,
25579 IX86_BUILTIN_VFMADDSD,
25580 IX86_BUILTIN_VFMADDPS,
25581 IX86_BUILTIN_VFMADDPD,
25582 IX86_BUILTIN_VFMADDPS256,
25583 IX86_BUILTIN_VFMADDPD256,
25584 IX86_BUILTIN_VFMADDSUBPS,
25585 IX86_BUILTIN_VFMADDSUBPD,
25586 IX86_BUILTIN_VFMADDSUBPS256,
25587 IX86_BUILTIN_VFMADDSUBPD256,
25589 /* FMA3 instructions. */
25590 IX86_BUILTIN_VFMADDSS3,
25591 IX86_BUILTIN_VFMADDSD3,
25593 /* XOP instructions. */
25594 IX86_BUILTIN_VPCMOV,
25595 IX86_BUILTIN_VPCMOV_V2DI,
25596 IX86_BUILTIN_VPCMOV_V4SI,
25597 IX86_BUILTIN_VPCMOV_V8HI,
25598 IX86_BUILTIN_VPCMOV_V16QI,
25599 IX86_BUILTIN_VPCMOV_V4SF,
25600 IX86_BUILTIN_VPCMOV_V2DF,
25601 IX86_BUILTIN_VPCMOV256,
25602 IX86_BUILTIN_VPCMOV_V4DI256,
25603 IX86_BUILTIN_VPCMOV_V8SI256,
25604 IX86_BUILTIN_VPCMOV_V16HI256,
25605 IX86_BUILTIN_VPCMOV_V32QI256,
25606 IX86_BUILTIN_VPCMOV_V8SF256,
25607 IX86_BUILTIN_VPCMOV_V4DF256,
25609 IX86_BUILTIN_VPPERM,
25611 IX86_BUILTIN_VPMACSSWW,
25612 IX86_BUILTIN_VPMACSWW,
25613 IX86_BUILTIN_VPMACSSWD,
25614 IX86_BUILTIN_VPMACSWD,
25615 IX86_BUILTIN_VPMACSSDD,
25616 IX86_BUILTIN_VPMACSDD,
25617 IX86_BUILTIN_VPMACSSDQL,
25618 IX86_BUILTIN_VPMACSSDQH,
25619 IX86_BUILTIN_VPMACSDQL,
25620 IX86_BUILTIN_VPMACSDQH,
25621 IX86_BUILTIN_VPMADCSSWD,
25622 IX86_BUILTIN_VPMADCSWD,
25624 IX86_BUILTIN_VPHADDBW,
25625 IX86_BUILTIN_VPHADDBD,
25626 IX86_BUILTIN_VPHADDBQ,
25627 IX86_BUILTIN_VPHADDWD,
25628 IX86_BUILTIN_VPHADDWQ,
25629 IX86_BUILTIN_VPHADDDQ,
25630 IX86_BUILTIN_VPHADDUBW,
25631 IX86_BUILTIN_VPHADDUBD,
25632 IX86_BUILTIN_VPHADDUBQ,
25633 IX86_BUILTIN_VPHADDUWD,
25634 IX86_BUILTIN_VPHADDUWQ,
25635 IX86_BUILTIN_VPHADDUDQ,
25636 IX86_BUILTIN_VPHSUBBW,
25637 IX86_BUILTIN_VPHSUBWD,
25638 IX86_BUILTIN_VPHSUBDQ,
25640 IX86_BUILTIN_VPROTB,
25641 IX86_BUILTIN_VPROTW,
25642 IX86_BUILTIN_VPROTD,
25643 IX86_BUILTIN_VPROTQ,
25644 IX86_BUILTIN_VPROTB_IMM,
25645 IX86_BUILTIN_VPROTW_IMM,
25646 IX86_BUILTIN_VPROTD_IMM,
25647 IX86_BUILTIN_VPROTQ_IMM,
25649 IX86_BUILTIN_VPSHLB,
25650 IX86_BUILTIN_VPSHLW,
25651 IX86_BUILTIN_VPSHLD,
25652 IX86_BUILTIN_VPSHLQ,
25653 IX86_BUILTIN_VPSHAB,
25654 IX86_BUILTIN_VPSHAW,
25655 IX86_BUILTIN_VPSHAD,
25656 IX86_BUILTIN_VPSHAQ,
25658 IX86_BUILTIN_VFRCZSS,
25659 IX86_BUILTIN_VFRCZSD,
25660 IX86_BUILTIN_VFRCZPS,
25661 IX86_BUILTIN_VFRCZPD,
25662 IX86_BUILTIN_VFRCZPS256,
25663 IX86_BUILTIN_VFRCZPD256,
25665 IX86_BUILTIN_VPCOMEQUB,
25666 IX86_BUILTIN_VPCOMNEUB,
25667 IX86_BUILTIN_VPCOMLTUB,
25668 IX86_BUILTIN_VPCOMLEUB,
25669 IX86_BUILTIN_VPCOMGTUB,
25670 IX86_BUILTIN_VPCOMGEUB,
25671 IX86_BUILTIN_VPCOMFALSEUB,
25672 IX86_BUILTIN_VPCOMTRUEUB,
25674 IX86_BUILTIN_VPCOMEQUW,
25675 IX86_BUILTIN_VPCOMNEUW,
25676 IX86_BUILTIN_VPCOMLTUW,
25677 IX86_BUILTIN_VPCOMLEUW,
25678 IX86_BUILTIN_VPCOMGTUW,
25679 IX86_BUILTIN_VPCOMGEUW,
25680 IX86_BUILTIN_VPCOMFALSEUW,
25681 IX86_BUILTIN_VPCOMTRUEUW,
25683 IX86_BUILTIN_VPCOMEQUD,
25684 IX86_BUILTIN_VPCOMNEUD,
25685 IX86_BUILTIN_VPCOMLTUD,
25686 IX86_BUILTIN_VPCOMLEUD,
25687 IX86_BUILTIN_VPCOMGTUD,
25688 IX86_BUILTIN_VPCOMGEUD,
25689 IX86_BUILTIN_VPCOMFALSEUD,
25690 IX86_BUILTIN_VPCOMTRUEUD,
25692 IX86_BUILTIN_VPCOMEQUQ,
25693 IX86_BUILTIN_VPCOMNEUQ,
25694 IX86_BUILTIN_VPCOMLTUQ,
25695 IX86_BUILTIN_VPCOMLEUQ,
25696 IX86_BUILTIN_VPCOMGTUQ,
25697 IX86_BUILTIN_VPCOMGEUQ,
25698 IX86_BUILTIN_VPCOMFALSEUQ,
25699 IX86_BUILTIN_VPCOMTRUEUQ,
25701 IX86_BUILTIN_VPCOMEQB,
25702 IX86_BUILTIN_VPCOMNEB,
25703 IX86_BUILTIN_VPCOMLTB,
25704 IX86_BUILTIN_VPCOMLEB,
25705 IX86_BUILTIN_VPCOMGTB,
25706 IX86_BUILTIN_VPCOMGEB,
25707 IX86_BUILTIN_VPCOMFALSEB,
25708 IX86_BUILTIN_VPCOMTRUEB,
25710 IX86_BUILTIN_VPCOMEQW,
25711 IX86_BUILTIN_VPCOMNEW,
25712 IX86_BUILTIN_VPCOMLTW,
25713 IX86_BUILTIN_VPCOMLEW,
25714 IX86_BUILTIN_VPCOMGTW,
25715 IX86_BUILTIN_VPCOMGEW,
25716 IX86_BUILTIN_VPCOMFALSEW,
25717 IX86_BUILTIN_VPCOMTRUEW,
25719 IX86_BUILTIN_VPCOMEQD,
25720 IX86_BUILTIN_VPCOMNED,
25721 IX86_BUILTIN_VPCOMLTD,
25722 IX86_BUILTIN_VPCOMLED,
25723 IX86_BUILTIN_VPCOMGTD,
25724 IX86_BUILTIN_VPCOMGED,
25725 IX86_BUILTIN_VPCOMFALSED,
25726 IX86_BUILTIN_VPCOMTRUED,
25728 IX86_BUILTIN_VPCOMEQQ,
25729 IX86_BUILTIN_VPCOMNEQ,
25730 IX86_BUILTIN_VPCOMLTQ,
25731 IX86_BUILTIN_VPCOMLEQ,
25732 IX86_BUILTIN_VPCOMGTQ,
25733 IX86_BUILTIN_VPCOMGEQ,
25734 IX86_BUILTIN_VPCOMFALSEQ,
25735 IX86_BUILTIN_VPCOMTRUEQ,
25737 /* LWP instructions. */
25738 IX86_BUILTIN_LLWPCB,
25739 IX86_BUILTIN_SLWPCB,
25740 IX86_BUILTIN_LWPVAL32,
25741 IX86_BUILTIN_LWPVAL64,
25742 IX86_BUILTIN_LWPINS32,
25743 IX86_BUILTIN_LWPINS64,
25747 /* BMI instructions. */
25748 IX86_BUILTIN_BEXTR32,
25749 IX86_BUILTIN_BEXTR64,
25752 /* TBM instructions. */
25753 IX86_BUILTIN_BEXTRI32,
25754 IX86_BUILTIN_BEXTRI64,
25756 /* BMI2 instructions. */
25757 IX86_BUILTIN_BZHI32,
25758 IX86_BUILTIN_BZHI64,
25759 IX86_BUILTIN_PDEP32,
25760 IX86_BUILTIN_PDEP64,
25761 IX86_BUILTIN_PEXT32,
25762 IX86_BUILTIN_PEXT64,
25764 /* FSGSBASE instructions. */
25765 IX86_BUILTIN_RDFSBASE32,
25766 IX86_BUILTIN_RDFSBASE64,
25767 IX86_BUILTIN_RDGSBASE32,
25768 IX86_BUILTIN_RDGSBASE64,
25769 IX86_BUILTIN_WRFSBASE32,
25770 IX86_BUILTIN_WRFSBASE64,
25771 IX86_BUILTIN_WRGSBASE32,
25772 IX86_BUILTIN_WRGSBASE64,
25774 /* RDRND instructions. */
25775 IX86_BUILTIN_RDRAND16_STEP,
25776 IX86_BUILTIN_RDRAND32_STEP,
25777 IX86_BUILTIN_RDRAND64_STEP,
25779 /* F16C instructions. */
25780 IX86_BUILTIN_CVTPH2PS,
25781 IX86_BUILTIN_CVTPH2PS256,
25782 IX86_BUILTIN_CVTPS2PH,
25783 IX86_BUILTIN_CVTPS2PH256,
25785 /* CFString built-in for darwin */
25786 IX86_BUILTIN_CFSTRING,
25791 /* Table for the ix86 builtin decls. */
25792 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
25794 /* Table of all of the builtin functions that are possible with different ISA's
25795 but are waiting to be built until a function is declared to use that
25797 struct builtin_isa {
25798 const char *name; /* function name */
25799 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
25800 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
25801 bool const_p; /* true if the declaration is constant */
25802 bool set_and_not_built_p;
25805 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
25808 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25809 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25810 function decl in the ix86_builtins array. Returns the function decl or
25811 NULL_TREE, if the builtin was not added.
25813 If the front end has a special hook for builtin functions, delay adding
25814 builtin functions that aren't in the current ISA until the ISA is changed
25815 with function specific optimization. Doing so, can save about 300K for the
25816 default compiler. When the builtin is expanded, check at that time whether
25819 If the front end doesn't have a special hook, record all builtins, even if
25820 it isn't an instruction set in the current ISA in case the user uses
25821 function specific options for a different ISA, so that we don't get scope
25822 errors if a builtin is added in the middle of a function scope. */
25825 def_builtin (HOST_WIDE_INT mask, const char *name,
25826 enum ix86_builtin_func_type tcode,
25827 enum ix86_builtins code)
25829 tree decl = NULL_TREE;
25831 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
25833 ix86_builtins_isa[(int) code].isa = mask;
25835 mask &= ~OPTION_MASK_ISA_64BIT;
25837 || (mask & ix86_isa_flags) != 0
25838 || (lang_hooks.builtin_function
25839 == lang_hooks.builtin_function_ext_scope))
25842 tree type = ix86_get_builtin_func_type (tcode);
25843 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
25845 ix86_builtins[(int) code] = decl;
25846 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
25850 ix86_builtins[(int) code] = NULL_TREE;
25851 ix86_builtins_isa[(int) code].tcode = tcode;
25852 ix86_builtins_isa[(int) code].name = name;
25853 ix86_builtins_isa[(int) code].const_p = false;
25854 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
25861 /* Like def_builtin, but also marks the function decl "const". */
25864 def_builtin_const (HOST_WIDE_INT mask, const char *name,
25865 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
25867 tree decl = def_builtin (mask, name, tcode, code);
25869 TREE_READONLY (decl) = 1;
25871 ix86_builtins_isa[(int) code].const_p = true;
25876 /* Add any new builtin functions for a given ISA that may not have been
25877 declared. This saves a bit of space compared to adding all of the
25878 declarations to the tree, even if we didn't use them. */
25881 ix86_add_new_builtins (HOST_WIDE_INT isa)
25885 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
25887 if ((ix86_builtins_isa[i].isa & isa) != 0
25888 && ix86_builtins_isa[i].set_and_not_built_p)
25892 /* Don't define the builtin again. */
25893 ix86_builtins_isa[i].set_and_not_built_p = false;
25895 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
25896 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
25897 type, i, BUILT_IN_MD, NULL,
25900 ix86_builtins[i] = decl;
25901 if (ix86_builtins_isa[i].const_p)
25902 TREE_READONLY (decl) = 1;
25907 /* Bits for builtin_description.flag. */
25909 /* Set when we don't support the comparison natively, and should
25910 swap_comparison in order to support it. */
25911 #define BUILTIN_DESC_SWAP_OPERANDS 1
25913 struct builtin_description
25915 const HOST_WIDE_INT mask;
25916 const enum insn_code icode;
25917 const char *const name;
25918 const enum ix86_builtins code;
25919 const enum rtx_code comparison;
25923 static const struct builtin_description bdesc_comi[] =
25925 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
25926 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
25927 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
25928 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
25929 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
25930 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
25931 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
25932 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
25933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
25934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
25935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
25936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
25937 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
25938 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
25939 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
25940 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
25941 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
25942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
25943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
25944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
25945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
25946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
25947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
25948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
25951 static const struct builtin_description bdesc_pcmpestr[] =
25954 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
25955 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
25956 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
25957 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
25958 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
25959 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
25960 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
25963 static const struct builtin_description bdesc_pcmpistr[] =
25966 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
25967 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
25968 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
25969 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
25970 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
25971 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
25972 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
25975 /* Special builtins with variable number of arguments. */
25976 static const struct builtin_description bdesc_special_args[] =
25978 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
25979 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
25980 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
25983 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
25986 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
25989 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25990 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25991 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
25993 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
25994 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
25995 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
25996 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
25998 /* SSE or 3DNow!A */
25999 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26000 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
26003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26006 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
26007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
26009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
26010 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
26011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
26012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
26014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
26015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
26018 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
26021 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
26024 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26025 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
26029 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
26031 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
26032 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
26033 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
26034 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
26035 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
26037 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
26038 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
26039 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
26040 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
26041 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
26042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
26043 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
26045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
26046 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
26047 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
26049 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
26050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
26051 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
26052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
26053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
26054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
26055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
26056 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
26059 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
26060 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
26061 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
26062 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
26063 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
26064 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
26065 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
26066 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
26067 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
26069 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
26070 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
26071 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
26072 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
26073 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
26074 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
26077 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
26078 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
26079 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
26080 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
26081 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
26082 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
26083 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
26084 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
26087 /* Builtins with variable number of arguments. */
26088 static const struct builtin_description bdesc_args[] =
26090 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
26091 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
26092 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
26093 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
26094 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
26095 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
26096 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
26099 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26100 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26101 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26102 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26103 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26104 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26106 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26107 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26108 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26109 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26110 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26111 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26112 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26113 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26115 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26118 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26120 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26127 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26132 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26134 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
26135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
26137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
26138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
26139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
26141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
26143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
26146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
26150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
26153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
26157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26163 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
26164 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
26165 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26166 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26168 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26169 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26170 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26171 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26172 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26173 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26174 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26175 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26176 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26177 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26178 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26179 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26180 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26181 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26182 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26185 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
26186 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
26187 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
26188 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26189 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26190 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26193 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
26194 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26195 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26196 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26197 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26198 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26199 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
26200 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
26201 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
26202 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
26203 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
26204 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
26206 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26208 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26209 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26210 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26211 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26212 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26213 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26214 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
26218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
26219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
26220 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26221 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26222 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26223 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
26224 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
26225 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
26226 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26227 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
26228 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26229 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
26230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
26231 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
26232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26233 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
26234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
26235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
26236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26240 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26241 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26245 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26247 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26248 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26250 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26252 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26253 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26255 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26256 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
26259 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
26260 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
26262 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
26264 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26265 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26268 /* SSE MMX or 3Dnow!A */
26269 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26270 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26271 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26273 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26274 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26275 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26276 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26278 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
26279 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
26281 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
26284 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26286 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
26287 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
26288 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
26289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
26290 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
26292 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
26293 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
26294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
26295 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
26296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
26298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
26300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
26301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
26302 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
26303 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
26305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
26307 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26309 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26310 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26311 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26312 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
26319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
26320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
26321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
26323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
26325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
26326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
26327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
26331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
26332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
26333 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
26335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
26336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
26337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26340 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26344 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26346 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26347 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26349 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26352 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26353 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26355 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
26357 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26358 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26359 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26360 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26361 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26362 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26363 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26364 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26366 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26372 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26375 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26376 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
26378 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26380 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26381 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26393 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26394 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26395 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26398 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26399 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26400 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26401 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26402 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26403 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26404 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26405 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
26408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
26409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
26411 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
26414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
26415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
26417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
26419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
26420 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
26421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
26422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
26424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
26425 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26426 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26427 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
26428 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26429 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26430 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
26432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
26433 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26434 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26435 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
26436 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26437 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26438 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
26440 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26441 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26442 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26443 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
26446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
26447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
26449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
26451 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
26452 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
26454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26457 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
26458 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
26461 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
26462 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26464 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26465 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26466 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26467 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26468 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26469 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26472 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
26473 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
26474 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26475 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
26476 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
26477 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
26479 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26480 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26481 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26482 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26483 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26484 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26485 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26486 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26487 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26488 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26489 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26490 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26491 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
26492 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
26493 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26494 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26495 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26496 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26497 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26498 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26499 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26500 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26501 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26502 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
26506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
26509 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26510 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26511 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
26512 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
26513 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26514 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26515 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26516 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
26517 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
26518 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
26520 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
26521 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
26522 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
26523 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
26524 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
26525 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
26526 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
26527 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
26528 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
26529 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
26530 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
26531 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
26532 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26534 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
26535 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26536 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26537 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26538 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26539 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26540 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
26545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26548 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26549 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26550 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26551 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26553 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
26554 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
26555 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
26556 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
26558 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
26559 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
26561 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
26562 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
26564 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
26565 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
26566 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
26567 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
26569 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
26570 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
26572 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26573 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26575 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26576 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26577 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26580 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26581 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
26582 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
26583 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26584 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26587 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
26588 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
26589 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
26590 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26593 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
26594 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26596 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26597 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26598 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26599 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26602 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
26605 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26606 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26607 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26608 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26609 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26610 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26611 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26612 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26613 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26614 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26615 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26616 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26617 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26619 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26620 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26621 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26622 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26623 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26624 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26625 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26626 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26627 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26628 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26629 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26630 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
26633 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
26634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
26635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
26637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26639 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
26640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
26641 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26643 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26645 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26646 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
26651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
26652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
26653 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
26654 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
26655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
26656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
26658 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26660 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
26669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
26670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
26672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26676 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26678 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26680 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
26688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
26689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
26690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
26692 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26693 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
26695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
26696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
26698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
26699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
26700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
26701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
26703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
26704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
26706 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26707 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
26715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
26716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
26717 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
26718 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
26719 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
26721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
26738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
26740 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26741 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26743 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
26746 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
26747 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
26748 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
26749 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
26750 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
26751 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
26752 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
26753 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
26754 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26755 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26756 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26757 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26758 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26759 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26760 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26762 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
26763 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26764 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26767 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
26768 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
26769 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26770 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26771 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26772 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
26785 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26786 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26787 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26788 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26789 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26790 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26791 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26792 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26793 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26794 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26795 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26796 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
26798 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26799 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26800 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26801 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26802 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26803 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26804 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26805 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26806 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26807 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26808 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26809 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26810 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26811 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26812 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26813 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26814 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26815 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26817 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26820 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
26821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26824 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26825 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26826 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
26827 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26828 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26829 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26830 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26831 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26832 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26833 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26834 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26835 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26836 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
26838 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26839 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26840 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26841 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26842 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26843 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26844 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26845 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26846 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26847 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26849 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26860 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
26863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
26864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
26865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
26866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
26868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
26869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
26870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
26871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
26872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
26874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
26878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
26879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
26880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
26881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
26882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26893 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
26896 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26897 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26898 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
26901 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26902 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26905 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
26906 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
26907 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
26908 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
26911 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26912 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26913 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26914 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26915 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26916 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26919 /* FMA4 and XOP. */
26920 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
26921 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
26922 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
26923 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
26924 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
26925 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
26926 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
26927 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
26928 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
26929 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
26930 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
26931 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
26932 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
26933 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
26934 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
26935 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
26936 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
26937 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
26938 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
26939 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
26940 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
26941 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
26942 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
26943 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
26944 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
26945 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
26946 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
26947 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
26948 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
26949 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
26950 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
26951 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
26952 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
26953 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
26954 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
26955 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
26956 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
26957 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
26958 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
26959 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
26960 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
26961 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
26962 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
26963 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
26964 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
26965 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
26966 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
26967 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
26968 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
26969 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
26970 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
26971 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
26973 static const struct builtin_description bdesc_multi_arg[] =
26975 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
26976 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
26977 UNKNOWN, (int)MULTI_ARG_3_SF },
26978 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
26979 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
26980 UNKNOWN, (int)MULTI_ARG_3_DF },
26982 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
26983 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
26984 UNKNOWN, (int)MULTI_ARG_3_SF },
26985 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
26986 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
26987 UNKNOWN, (int)MULTI_ARG_3_DF },
26989 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
26990 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
26991 UNKNOWN, (int)MULTI_ARG_3_SF },
26992 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
26993 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
26994 UNKNOWN, (int)MULTI_ARG_3_DF },
26995 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
26996 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
26997 UNKNOWN, (int)MULTI_ARG_3_SF2 },
26998 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
26999 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
27000 UNKNOWN, (int)MULTI_ARG_3_DF2 },
27002 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
27003 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
27004 UNKNOWN, (int)MULTI_ARG_3_SF },
27005 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
27006 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
27007 UNKNOWN, (int)MULTI_ARG_3_DF },
27008 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
27009 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
27010 UNKNOWN, (int)MULTI_ARG_3_SF2 },
27011 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
27012 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
27013 UNKNOWN, (int)MULTI_ARG_3_DF2 },
27015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
27016 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
27017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
27018 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
27019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
27020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
27021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
27023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
27024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
27025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
27026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
27027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
27028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
27029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
27031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
27033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
27034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
27035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
27038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
27039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
27048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
27049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
27050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
27051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
27052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
27053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
27054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
27056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
27057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
27058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
27060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
27061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
27063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
27064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
27065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
27066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
27067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
27068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
27070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
27072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
27073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
27075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
27078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
27079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
27081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
27087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
27088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
27089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
27090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
27091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
27092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
27094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
27095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
27096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
27097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
27098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
27099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
27100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
27102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
27103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
27104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
27105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
27106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
27107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
27108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
27110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
27111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
27112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
27113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
27114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
27115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
27116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
27118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
27119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
27120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
27121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
27122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
27123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
27124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
27126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
27127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
27128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
27129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
27130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
27131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
27132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
27134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
27135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
27136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
27137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
27138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
27139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
27140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
27142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
27143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
27144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
27145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
27146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
27147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
27148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
27150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
27151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
27152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
27153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
27154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
27155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
27156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
27157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
27159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
27160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
27161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
27162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
27163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
27164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
27165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
27166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
27168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
27169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
27170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
27171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
27175 /* TM vector builtins. */
27177 /* Reuse the existing x86-specific `struct builtin_description' cause
27178 we're lazy. Add casts to make them fit. */
27179 static const struct builtin_description bdesc_tm[] =
27181 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27182 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27183 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27184 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27185 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27186 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27187 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27189 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27190 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27191 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27192 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27193 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27194 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27195 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27197 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27198 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27199 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27200 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27201 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27202 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27203 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27205 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
27206 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
27207 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
27210 /* TM callbacks. */
27212 /* Return the builtin decl needed to load a vector of TYPE. */
27215 ix86_builtin_tm_load (tree type)
27217 if (TREE_CODE (type) == VECTOR_TYPE)
27219 switch (tree_low_cst (TYPE_SIZE (type), 1))
27222 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
27224 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
27226 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
27232 /* Return the builtin decl needed to store a vector of TYPE. */
27235 ix86_builtin_tm_store (tree type)
27237 if (TREE_CODE (type) == VECTOR_TYPE)
27239 switch (tree_low_cst (TYPE_SIZE (type), 1))
27242 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
27244 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
27246 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
27252 /* Initialize the transactional memory vector load/store builtins. */
27255 ix86_init_tm_builtins (void)
27257 enum ix86_builtin_func_type ftype;
27258 const struct builtin_description *d;
27261 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
27262 tree attrs_log, attrs_type_log;
27267 /* If there are no builtins defined, we must be compiling in a
27268 language without trans-mem support. */
27269 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
27272 /* Use whatever attributes a normal TM load has. */
27273 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
27274 attrs_load = DECL_ATTRIBUTES (decl);
27275 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27276 /* Use whatever attributes a normal TM store has. */
27277 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
27278 attrs_store = DECL_ATTRIBUTES (decl);
27279 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27280 /* Use whatever attributes a normal TM log has. */
27281 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
27282 attrs_log = DECL_ATTRIBUTES (decl);
27283 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27285 for (i = 0, d = bdesc_tm;
27286 i < ARRAY_SIZE (bdesc_tm);
27289 if ((d->mask & ix86_isa_flags) != 0
27290 || (lang_hooks.builtin_function
27291 == lang_hooks.builtin_function_ext_scope))
27293 tree type, attrs, attrs_type;
27294 enum built_in_function code = (enum built_in_function) d->code;
27296 ftype = (enum ix86_builtin_func_type) d->flag;
27297 type = ix86_get_builtin_func_type (ftype);
27299 if (BUILTIN_TM_LOAD_P (code))
27301 attrs = attrs_load;
27302 attrs_type = attrs_type_load;
27304 else if (BUILTIN_TM_STORE_P (code))
27306 attrs = attrs_store;
27307 attrs_type = attrs_type_store;
27312 attrs_type = attrs_type_log;
27314 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
27315 /* The builtin without the prefix for
27316 calling it directly. */
27317 d->name + strlen ("__builtin_"),
27319 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27320 set the TYPE_ATTRIBUTES. */
27321 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
27323 set_builtin_decl (code, decl, false);
27328 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27329 in the current target ISA to allow the user to compile particular modules
27330 with different target specific options that differ from the command line
27333 ix86_init_mmx_sse_builtins (void)
27335 const struct builtin_description * d;
27336 enum ix86_builtin_func_type ftype;
27339 /* Add all special builtins with variable number of operands. */
27340 for (i = 0, d = bdesc_special_args;
27341 i < ARRAY_SIZE (bdesc_special_args);
27347 ftype = (enum ix86_builtin_func_type) d->flag;
27348 def_builtin (d->mask, d->name, ftype, d->code);
27351 /* Add all builtins with variable number of operands. */
27352 for (i = 0, d = bdesc_args;
27353 i < ARRAY_SIZE (bdesc_args);
27359 ftype = (enum ix86_builtin_func_type) d->flag;
27360 def_builtin_const (d->mask, d->name, ftype, d->code);
27363 /* pcmpestr[im] insns. */
27364 for (i = 0, d = bdesc_pcmpestr;
27365 i < ARRAY_SIZE (bdesc_pcmpestr);
27368 if (d->code == IX86_BUILTIN_PCMPESTRM128)
27369 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
27371 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
27372 def_builtin_const (d->mask, d->name, ftype, d->code);
27375 /* pcmpistr[im] insns. */
27376 for (i = 0, d = bdesc_pcmpistr;
27377 i < ARRAY_SIZE (bdesc_pcmpistr);
27380 if (d->code == IX86_BUILTIN_PCMPISTRM128)
27381 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
27383 ftype = INT_FTYPE_V16QI_V16QI_INT;
27384 def_builtin_const (d->mask, d->name, ftype, d->code);
27387 /* comi/ucomi insns. */
27388 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27390 if (d->mask == OPTION_MASK_ISA_SSE2)
27391 ftype = INT_FTYPE_V2DF_V2DF;
27393 ftype = INT_FTYPE_V4SF_V4SF;
27394 def_builtin_const (d->mask, d->name, ftype, d->code);
27398 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
27399 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
27400 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
27401 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
27403 /* SSE or 3DNow!A */
27404 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27405 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
27406 IX86_BUILTIN_MASKMOVQ);
27409 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
27410 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
27412 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
27413 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
27414 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
27415 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
27418 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
27419 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
27420 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
27421 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
27424 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
27425 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
27426 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
27427 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
27428 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
27429 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
27430 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
27431 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
27432 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
27433 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
27434 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
27435 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
27438 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
27439 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
27442 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
27443 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
27444 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
27445 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
27446 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
27447 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
27448 IX86_BUILTIN_RDRAND64_STEP);
27451 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
27452 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
27453 IX86_BUILTIN_GATHERSIV2DF);
27455 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
27456 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
27457 IX86_BUILTIN_GATHERSIV4DF);
27459 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
27460 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
27461 IX86_BUILTIN_GATHERDIV2DF);
27463 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
27464 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
27465 IX86_BUILTIN_GATHERDIV4DF);
27467 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
27468 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
27469 IX86_BUILTIN_GATHERSIV4SF);
27471 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
27472 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
27473 IX86_BUILTIN_GATHERSIV8SF);
27475 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
27476 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
27477 IX86_BUILTIN_GATHERDIV4SF);
27479 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
27480 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
27481 IX86_BUILTIN_GATHERDIV8SF);
27483 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
27484 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
27485 IX86_BUILTIN_GATHERSIV2DI);
27487 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
27488 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
27489 IX86_BUILTIN_GATHERSIV4DI);
27491 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
27492 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
27493 IX86_BUILTIN_GATHERDIV2DI);
27495 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
27496 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
27497 IX86_BUILTIN_GATHERDIV4DI);
27499 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
27500 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
27501 IX86_BUILTIN_GATHERSIV4SI);
27503 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
27504 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
27505 IX86_BUILTIN_GATHERSIV8SI);
27507 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
27508 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
27509 IX86_BUILTIN_GATHERDIV4SI);
27511 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
27512 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
27513 IX86_BUILTIN_GATHERDIV8SI);
27515 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
27516 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
27517 IX86_BUILTIN_GATHERALTSIV4DF);
27519 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
27520 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
27521 IX86_BUILTIN_GATHERALTDIV8SF);
27523 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
27524 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
27525 IX86_BUILTIN_GATHERALTSIV4DI);
27527 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
27528 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
27529 IX86_BUILTIN_GATHERALTDIV8SI);
27531 /* MMX access to the vec_init patterns. */
27532 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
27533 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
27535 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
27536 V4HI_FTYPE_HI_HI_HI_HI,
27537 IX86_BUILTIN_VEC_INIT_V4HI);
27539 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
27540 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
27541 IX86_BUILTIN_VEC_INIT_V8QI);
27543 /* Access to the vec_extract patterns. */
27544 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
27545 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
27546 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
27547 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
27548 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
27549 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
27550 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
27551 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
27552 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
27553 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
27555 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27556 "__builtin_ia32_vec_ext_v4hi",
27557 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
27559 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
27560 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
27562 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
27563 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
27565 /* Access to the vec_set patterns. */
27566 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
27567 "__builtin_ia32_vec_set_v2di",
27568 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
27570 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
27571 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
27573 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
27574 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
27576 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
27577 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
27579 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27580 "__builtin_ia32_vec_set_v4hi",
27581 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
27583 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
27584 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
27586 /* Add FMA4 multi-arg argument instructions */
27587 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27592 ftype = (enum ix86_builtin_func_type) d->flag;
27593 def_builtin_const (d->mask, d->name, ftype, d->code);
27597 /* Internal method for ix86_init_builtins. */
27600 ix86_init_builtins_va_builtins_abi (void)
27602 tree ms_va_ref, sysv_va_ref;
27603 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
27604 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
27605 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
27606 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
27610 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
27611 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
27612 ms_va_ref = build_reference_type (ms_va_list_type_node);
27614 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
27617 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
27618 fnvoid_va_start_ms =
27619 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
27620 fnvoid_va_end_sysv =
27621 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
27622 fnvoid_va_start_sysv =
27623 build_varargs_function_type_list (void_type_node, sysv_va_ref,
27625 fnvoid_va_copy_ms =
27626 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
27628 fnvoid_va_copy_sysv =
27629 build_function_type_list (void_type_node, sysv_va_ref,
27630 sysv_va_ref, NULL_TREE);
27632 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
27633 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
27634 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
27635 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
27636 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
27637 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
27638 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
27639 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27640 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
27641 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27642 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
27643 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27647 ix86_init_builtin_types (void)
27649 tree float128_type_node, float80_type_node;
27651 /* The __float80 type. */
27652 float80_type_node = long_double_type_node;
27653 if (TYPE_MODE (float80_type_node) != XFmode)
27655 /* The __float80 type. */
27656 float80_type_node = make_node (REAL_TYPE);
27658 TYPE_PRECISION (float80_type_node) = 80;
27659 layout_type (float80_type_node);
27661 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
27663 /* The __float128 type. */
27664 float128_type_node = make_node (REAL_TYPE);
27665 TYPE_PRECISION (float128_type_node) = 128;
27666 layout_type (float128_type_node);
27667 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
27669 /* This macro is built by i386-builtin-types.awk. */
27670 DEFINE_BUILTIN_PRIMITIVE_TYPES;
27674 ix86_init_builtins (void)
27678 ix86_init_builtin_types ();
27680 /* TFmode support builtins. */
27681 def_builtin_const (0, "__builtin_infq",
27682 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
27683 def_builtin_const (0, "__builtin_huge_valq",
27684 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
27686 /* We will expand them to normal call if SSE2 isn't available since
27687 they are used by libgcc. */
27688 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
27689 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
27690 BUILT_IN_MD, "__fabstf2", NULL_TREE);
27691 TREE_READONLY (t) = 1;
27692 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
27694 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
27695 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
27696 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
27697 TREE_READONLY (t) = 1;
27698 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
27700 ix86_init_tm_builtins ();
27701 ix86_init_mmx_sse_builtins ();
27704 ix86_init_builtins_va_builtins_abi ();
27706 #ifdef SUBTARGET_INIT_BUILTINS
27707 SUBTARGET_INIT_BUILTINS;
27711 /* Return the ix86 builtin for CODE. */
27714 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
27716 if (code >= IX86_BUILTIN_MAX)
27717 return error_mark_node;
27719 return ix86_builtins[code];
27722 /* Errors in the source file can cause expand_expr to return const0_rtx
27723 where we expect a vector. To avoid crashing, use one of the vector
27724 clear instructions. */
27726 safe_vector_operand (rtx x, enum machine_mode mode)
27728 if (x == const0_rtx)
27729 x = CONST0_RTX (mode);
27733 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
27736 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
27739 tree arg0 = CALL_EXPR_ARG (exp, 0);
27740 tree arg1 = CALL_EXPR_ARG (exp, 1);
27741 rtx op0 = expand_normal (arg0);
27742 rtx op1 = expand_normal (arg1);
27743 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27744 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
27745 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
27747 if (VECTOR_MODE_P (mode0))
27748 op0 = safe_vector_operand (op0, mode0);
27749 if (VECTOR_MODE_P (mode1))
27750 op1 = safe_vector_operand (op1, mode1);
27752 if (optimize || !target
27753 || GET_MODE (target) != tmode
27754 || !insn_data[icode].operand[0].predicate (target, tmode))
27755 target = gen_reg_rtx (tmode);
27757 if (GET_MODE (op1) == SImode && mode1 == TImode)
27759 rtx x = gen_reg_rtx (V4SImode);
27760 emit_insn (gen_sse2_loadd (x, op1));
27761 op1 = gen_lowpart (TImode, x);
27764 if (!insn_data[icode].operand[1].predicate (op0, mode0))
27765 op0 = copy_to_mode_reg (mode0, op0);
27766 if (!insn_data[icode].operand[2].predicate (op1, mode1))
27767 op1 = copy_to_mode_reg (mode1, op1);
27769 pat = GEN_FCN (icode) (target, op0, op1);
27778 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
27781 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
27782 enum ix86_builtin_func_type m_type,
27783 enum rtx_code sub_code)
27788 bool comparison_p = false;
27790 bool last_arg_constant = false;
27791 int num_memory = 0;
27794 enum machine_mode mode;
27797 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27801 case MULTI_ARG_4_DF2_DI_I:
27802 case MULTI_ARG_4_DF2_DI_I1:
27803 case MULTI_ARG_4_SF2_SI_I:
27804 case MULTI_ARG_4_SF2_SI_I1:
27806 last_arg_constant = true;
27809 case MULTI_ARG_3_SF:
27810 case MULTI_ARG_3_DF:
27811 case MULTI_ARG_3_SF2:
27812 case MULTI_ARG_3_DF2:
27813 case MULTI_ARG_3_DI:
27814 case MULTI_ARG_3_SI:
27815 case MULTI_ARG_3_SI_DI:
27816 case MULTI_ARG_3_HI:
27817 case MULTI_ARG_3_HI_SI:
27818 case MULTI_ARG_3_QI:
27819 case MULTI_ARG_3_DI2:
27820 case MULTI_ARG_3_SI2:
27821 case MULTI_ARG_3_HI2:
27822 case MULTI_ARG_3_QI2:
27826 case MULTI_ARG_2_SF:
27827 case MULTI_ARG_2_DF:
27828 case MULTI_ARG_2_DI:
27829 case MULTI_ARG_2_SI:
27830 case MULTI_ARG_2_HI:
27831 case MULTI_ARG_2_QI:
27835 case MULTI_ARG_2_DI_IMM:
27836 case MULTI_ARG_2_SI_IMM:
27837 case MULTI_ARG_2_HI_IMM:
27838 case MULTI_ARG_2_QI_IMM:
27840 last_arg_constant = true;
27843 case MULTI_ARG_1_SF:
27844 case MULTI_ARG_1_DF:
27845 case MULTI_ARG_1_SF2:
27846 case MULTI_ARG_1_DF2:
27847 case MULTI_ARG_1_DI:
27848 case MULTI_ARG_1_SI:
27849 case MULTI_ARG_1_HI:
27850 case MULTI_ARG_1_QI:
27851 case MULTI_ARG_1_SI_DI:
27852 case MULTI_ARG_1_HI_DI:
27853 case MULTI_ARG_1_HI_SI:
27854 case MULTI_ARG_1_QI_DI:
27855 case MULTI_ARG_1_QI_SI:
27856 case MULTI_ARG_1_QI_HI:
27860 case MULTI_ARG_2_DI_CMP:
27861 case MULTI_ARG_2_SI_CMP:
27862 case MULTI_ARG_2_HI_CMP:
27863 case MULTI_ARG_2_QI_CMP:
27865 comparison_p = true;
27868 case MULTI_ARG_2_SF_TF:
27869 case MULTI_ARG_2_DF_TF:
27870 case MULTI_ARG_2_DI_TF:
27871 case MULTI_ARG_2_SI_TF:
27872 case MULTI_ARG_2_HI_TF:
27873 case MULTI_ARG_2_QI_TF:
27879 gcc_unreachable ();
27882 if (optimize || !target
27883 || GET_MODE (target) != tmode
27884 || !insn_data[icode].operand[0].predicate (target, tmode))
27885 target = gen_reg_rtx (tmode);
27887 gcc_assert (nargs <= 4);
27889 for (i = 0; i < nargs; i++)
27891 tree arg = CALL_EXPR_ARG (exp, i);
27892 rtx op = expand_normal (arg);
27893 int adjust = (comparison_p) ? 1 : 0;
27894 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
27896 if (last_arg_constant && i == nargs - 1)
27898 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
27900 enum insn_code new_icode = icode;
27903 case CODE_FOR_xop_vpermil2v2df3:
27904 case CODE_FOR_xop_vpermil2v4sf3:
27905 case CODE_FOR_xop_vpermil2v4df3:
27906 case CODE_FOR_xop_vpermil2v8sf3:
27907 error ("the last argument must be a 2-bit immediate");
27908 return gen_reg_rtx (tmode);
27909 case CODE_FOR_xop_rotlv2di3:
27910 new_icode = CODE_FOR_rotlv2di3;
27912 case CODE_FOR_xop_rotlv4si3:
27913 new_icode = CODE_FOR_rotlv4si3;
27915 case CODE_FOR_xop_rotlv8hi3:
27916 new_icode = CODE_FOR_rotlv8hi3;
27918 case CODE_FOR_xop_rotlv16qi3:
27919 new_icode = CODE_FOR_rotlv16qi3;
27921 if (CONST_INT_P (op))
27923 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
27924 op = GEN_INT (INTVAL (op) & mask);
27925 gcc_checking_assert
27926 (insn_data[icode].operand[i + 1].predicate (op, mode));
27930 gcc_checking_assert
27932 && insn_data[new_icode].operand[0].mode == tmode
27933 && insn_data[new_icode].operand[1].mode == tmode
27934 && insn_data[new_icode].operand[2].mode == mode
27935 && insn_data[new_icode].operand[0].predicate
27936 == insn_data[icode].operand[0].predicate
27937 && insn_data[new_icode].operand[1].predicate
27938 == insn_data[icode].operand[1].predicate);
27944 gcc_unreachable ();
27951 if (VECTOR_MODE_P (mode))
27952 op = safe_vector_operand (op, mode);
27954 /* If we aren't optimizing, only allow one memory operand to be
27956 if (memory_operand (op, mode))
27959 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
27962 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
27964 op = force_reg (mode, op);
27968 args[i].mode = mode;
27974 pat = GEN_FCN (icode) (target, args[0].op);
27979 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
27980 GEN_INT ((int)sub_code));
27981 else if (! comparison_p)
27982 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27985 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
27989 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
27994 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
27998 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
28002 gcc_unreachable ();
28012 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
28013 insns with vec_merge. */
28016 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
28020 tree arg0 = CALL_EXPR_ARG (exp, 0);
28021 rtx op1, op0 = expand_normal (arg0);
28022 enum machine_mode tmode = insn_data[icode].operand[0].mode;
28023 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
28025 if (optimize || !target
28026 || GET_MODE (target) != tmode
28027 || !insn_data[icode].operand[0].predicate (target, tmode))
28028 target = gen_reg_rtx (tmode);
28030 if (VECTOR_MODE_P (mode0))
28031 op0 = safe_vector_operand (op0, mode0);
28033 if ((optimize && !register_operand (op0, mode0))
28034 || !insn_data[icode].operand[1].predicate (op0, mode0))
28035 op0 = copy_to_mode_reg (mode0, op0);
28038 if (!insn_data[icode].operand[2].predicate (op1, mode0))
28039 op1 = copy_to_mode_reg (mode0, op1);
28041 pat = GEN_FCN (icode) (target, op0, op1);
28048 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
28051 ix86_expand_sse_compare (const struct builtin_description *d,
28052 tree exp, rtx target, bool swap)
28055 tree arg0 = CALL_EXPR_ARG (exp, 0);
28056 tree arg1 = CALL_EXPR_ARG (exp, 1);
28057 rtx op0 = expand_normal (arg0);
28058 rtx op1 = expand_normal (arg1);
28060 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28061 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28062 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
28063 enum rtx_code comparison = d->comparison;
28065 if (VECTOR_MODE_P (mode0))
28066 op0 = safe_vector_operand (op0, mode0);
28067 if (VECTOR_MODE_P (mode1))
28068 op1 = safe_vector_operand (op1, mode1);
28070 /* Swap operands if we have a comparison that isn't available in
28074 rtx tmp = gen_reg_rtx (mode1);
28075 emit_move_insn (tmp, op1);
28080 if (optimize || !target
28081 || GET_MODE (target) != tmode
28082 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28083 target = gen_reg_rtx (tmode);
28085 if ((optimize && !register_operand (op0, mode0))
28086 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
28087 op0 = copy_to_mode_reg (mode0, op0);
28088 if ((optimize && !register_operand (op1, mode1))
28089 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
28090 op1 = copy_to_mode_reg (mode1, op1);
28092 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
28093 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
28100 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28103 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
28107 tree arg0 = CALL_EXPR_ARG (exp, 0);
28108 tree arg1 = CALL_EXPR_ARG (exp, 1);
28109 rtx op0 = expand_normal (arg0);
28110 rtx op1 = expand_normal (arg1);
28111 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
28112 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
28113 enum rtx_code comparison = d->comparison;
28115 if (VECTOR_MODE_P (mode0))
28116 op0 = safe_vector_operand (op0, mode0);
28117 if (VECTOR_MODE_P (mode1))
28118 op1 = safe_vector_operand (op1, mode1);
28120 /* Swap operands if we have a comparison that isn't available in
28122 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
28129 target = gen_reg_rtx (SImode);
28130 emit_move_insn (target, const0_rtx);
28131 target = gen_rtx_SUBREG (QImode, target, 0);
28133 if ((optimize && !register_operand (op0, mode0))
28134 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28135 op0 = copy_to_mode_reg (mode0, op0);
28136 if ((optimize && !register_operand (op1, mode1))
28137 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28138 op1 = copy_to_mode_reg (mode1, op1);
28140 pat = GEN_FCN (d->icode) (op0, op1);
28144 emit_insn (gen_rtx_SET (VOIDmode,
28145 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28146 gen_rtx_fmt_ee (comparison, QImode,
28150 return SUBREG_REG (target);
28153 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28156 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
28160 tree arg0 = CALL_EXPR_ARG (exp, 0);
28161 rtx op1, op0 = expand_normal (arg0);
28162 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28163 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28165 if (optimize || target == 0
28166 || GET_MODE (target) != tmode
28167 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28168 target = gen_reg_rtx (tmode);
28170 if (VECTOR_MODE_P (mode0))
28171 op0 = safe_vector_operand (op0, mode0);
28173 if ((optimize && !register_operand (op0, mode0))
28174 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28175 op0 = copy_to_mode_reg (mode0, op0);
28177 op1 = GEN_INT (d->comparison);
28179 pat = GEN_FCN (d->icode) (target, op0, op1);
28187 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
28188 tree exp, rtx target)
28191 tree arg0 = CALL_EXPR_ARG (exp, 0);
28192 tree arg1 = CALL_EXPR_ARG (exp, 1);
28193 rtx op0 = expand_normal (arg0);
28194 rtx op1 = expand_normal (arg1);
28196 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28197 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28198 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
28200 if (optimize || target == 0
28201 || GET_MODE (target) != tmode
28202 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28203 target = gen_reg_rtx (tmode);
28205 op0 = safe_vector_operand (op0, mode0);
28206 op1 = safe_vector_operand (op1, mode1);
28208 if ((optimize && !register_operand (op0, mode0))
28209 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28210 op0 = copy_to_mode_reg (mode0, op0);
28211 if ((optimize && !register_operand (op1, mode1))
28212 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28213 op1 = copy_to_mode_reg (mode1, op1);
28215 op2 = GEN_INT (d->comparison);
28217 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
28224 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28227 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
28231 tree arg0 = CALL_EXPR_ARG (exp, 0);
28232 tree arg1 = CALL_EXPR_ARG (exp, 1);
28233 rtx op0 = expand_normal (arg0);
28234 rtx op1 = expand_normal (arg1);
28235 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
28236 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
28237 enum rtx_code comparison = d->comparison;
28239 if (VECTOR_MODE_P (mode0))
28240 op0 = safe_vector_operand (op0, mode0);
28241 if (VECTOR_MODE_P (mode1))
28242 op1 = safe_vector_operand (op1, mode1);
28244 target = gen_reg_rtx (SImode);
28245 emit_move_insn (target, const0_rtx);
28246 target = gen_rtx_SUBREG (QImode, target, 0);
28248 if ((optimize && !register_operand (op0, mode0))
28249 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28250 op0 = copy_to_mode_reg (mode0, op0);
28251 if ((optimize && !register_operand (op1, mode1))
28252 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28253 op1 = copy_to_mode_reg (mode1, op1);
28255 pat = GEN_FCN (d->icode) (op0, op1);
28259 emit_insn (gen_rtx_SET (VOIDmode,
28260 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28261 gen_rtx_fmt_ee (comparison, QImode,
28265 return SUBREG_REG (target);
28268 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28271 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
28272 tree exp, rtx target)
28275 tree arg0 = CALL_EXPR_ARG (exp, 0);
28276 tree arg1 = CALL_EXPR_ARG (exp, 1);
28277 tree arg2 = CALL_EXPR_ARG (exp, 2);
28278 tree arg3 = CALL_EXPR_ARG (exp, 3);
28279 tree arg4 = CALL_EXPR_ARG (exp, 4);
28280 rtx scratch0, scratch1;
28281 rtx op0 = expand_normal (arg0);
28282 rtx op1 = expand_normal (arg1);
28283 rtx op2 = expand_normal (arg2);
28284 rtx op3 = expand_normal (arg3);
28285 rtx op4 = expand_normal (arg4);
28286 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
28288 tmode0 = insn_data[d->icode].operand[0].mode;
28289 tmode1 = insn_data[d->icode].operand[1].mode;
28290 modev2 = insn_data[d->icode].operand[2].mode;
28291 modei3 = insn_data[d->icode].operand[3].mode;
28292 modev4 = insn_data[d->icode].operand[4].mode;
28293 modei5 = insn_data[d->icode].operand[5].mode;
28294 modeimm = insn_data[d->icode].operand[6].mode;
28296 if (VECTOR_MODE_P (modev2))
28297 op0 = safe_vector_operand (op0, modev2);
28298 if (VECTOR_MODE_P (modev4))
28299 op2 = safe_vector_operand (op2, modev4);
28301 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
28302 op0 = copy_to_mode_reg (modev2, op0);
28303 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
28304 op1 = copy_to_mode_reg (modei3, op1);
28305 if ((optimize && !register_operand (op2, modev4))
28306 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
28307 op2 = copy_to_mode_reg (modev4, op2);
28308 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
28309 op3 = copy_to_mode_reg (modei5, op3);
28311 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
28313 error ("the fifth argument must be an 8-bit immediate");
28317 if (d->code == IX86_BUILTIN_PCMPESTRI128)
28319 if (optimize || !target
28320 || GET_MODE (target) != tmode0
28321 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
28322 target = gen_reg_rtx (tmode0);
28324 scratch1 = gen_reg_rtx (tmode1);
28326 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
28328 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
28330 if (optimize || !target
28331 || GET_MODE (target) != tmode1
28332 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
28333 target = gen_reg_rtx (tmode1);
28335 scratch0 = gen_reg_rtx (tmode0);
28337 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
28341 gcc_assert (d->flag);
28343 scratch0 = gen_reg_rtx (tmode0);
28344 scratch1 = gen_reg_rtx (tmode1);
28346 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
28356 target = gen_reg_rtx (SImode);
28357 emit_move_insn (target, const0_rtx);
28358 target = gen_rtx_SUBREG (QImode, target, 0);
28361 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28362 gen_rtx_fmt_ee (EQ, QImode,
28363 gen_rtx_REG ((enum machine_mode) d->flag,
28366 return SUBREG_REG (target);
28373 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
28376 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
28377 tree exp, rtx target)
28380 tree arg0 = CALL_EXPR_ARG (exp, 0);
28381 tree arg1 = CALL_EXPR_ARG (exp, 1);
28382 tree arg2 = CALL_EXPR_ARG (exp, 2);
28383 rtx scratch0, scratch1;
28384 rtx op0 = expand_normal (arg0);
28385 rtx op1 = expand_normal (arg1);
28386 rtx op2 = expand_normal (arg2);
28387 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
28389 tmode0 = insn_data[d->icode].operand[0].mode;
28390 tmode1 = insn_data[d->icode].operand[1].mode;
28391 modev2 = insn_data[d->icode].operand[2].mode;
28392 modev3 = insn_data[d->icode].operand[3].mode;
28393 modeimm = insn_data[d->icode].operand[4].mode;
28395 if (VECTOR_MODE_P (modev2))
28396 op0 = safe_vector_operand (op0, modev2);
28397 if (VECTOR_MODE_P (modev3))
28398 op1 = safe_vector_operand (op1, modev3);
28400 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
28401 op0 = copy_to_mode_reg (modev2, op0);
28402 if ((optimize && !register_operand (op1, modev3))
28403 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
28404 op1 = copy_to_mode_reg (modev3, op1);
28406 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
28408 error ("the third argument must be an 8-bit immediate");
28412 if (d->code == IX86_BUILTIN_PCMPISTRI128)
28414 if (optimize || !target
28415 || GET_MODE (target) != tmode0
28416 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
28417 target = gen_reg_rtx (tmode0);
28419 scratch1 = gen_reg_rtx (tmode1);
28421 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
28423 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
28425 if (optimize || !target
28426 || GET_MODE (target) != tmode1
28427 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
28428 target = gen_reg_rtx (tmode1);
28430 scratch0 = gen_reg_rtx (tmode0);
28432 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
28436 gcc_assert (d->flag);
28438 scratch0 = gen_reg_rtx (tmode0);
28439 scratch1 = gen_reg_rtx (tmode1);
28441 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
28451 target = gen_reg_rtx (SImode);
28452 emit_move_insn (target, const0_rtx);
28453 target = gen_rtx_SUBREG (QImode, target, 0);
28456 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28457 gen_rtx_fmt_ee (EQ, QImode,
28458 gen_rtx_REG ((enum machine_mode) d->flag,
28461 return SUBREG_REG (target);
28467 /* Subroutine of ix86_expand_builtin to take care of insns with
28468 variable number of operands. */
28471 ix86_expand_args_builtin (const struct builtin_description *d,
28472 tree exp, rtx target)
28474 rtx pat, real_target;
28475 unsigned int i, nargs;
28476 unsigned int nargs_constant = 0;
28477 int num_memory = 0;
28481 enum machine_mode mode;
28483 bool last_arg_count = false;
28484 enum insn_code icode = d->icode;
28485 const struct insn_data_d *insn_p = &insn_data[icode];
28486 enum machine_mode tmode = insn_p->operand[0].mode;
28487 enum machine_mode rmode = VOIDmode;
28489 enum rtx_code comparison = d->comparison;
28491 switch ((enum ix86_builtin_func_type) d->flag)
28493 case V2DF_FTYPE_V2DF_ROUND:
28494 case V4DF_FTYPE_V4DF_ROUND:
28495 case V4SF_FTYPE_V4SF_ROUND:
28496 case V8SF_FTYPE_V8SF_ROUND:
28497 case V4SI_FTYPE_V4SF_ROUND:
28498 case V8SI_FTYPE_V8SF_ROUND:
28499 return ix86_expand_sse_round (d, exp, target);
28500 case V4SI_FTYPE_V2DF_V2DF_ROUND:
28501 case V8SI_FTYPE_V4DF_V4DF_ROUND:
28502 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
28503 case INT_FTYPE_V8SF_V8SF_PTEST:
28504 case INT_FTYPE_V4DI_V4DI_PTEST:
28505 case INT_FTYPE_V4DF_V4DF_PTEST:
28506 case INT_FTYPE_V4SF_V4SF_PTEST:
28507 case INT_FTYPE_V2DI_V2DI_PTEST:
28508 case INT_FTYPE_V2DF_V2DF_PTEST:
28509 return ix86_expand_sse_ptest (d, exp, target);
28510 case FLOAT128_FTYPE_FLOAT128:
28511 case FLOAT_FTYPE_FLOAT:
28512 case INT_FTYPE_INT:
28513 case UINT64_FTYPE_INT:
28514 case UINT16_FTYPE_UINT16:
28515 case INT64_FTYPE_INT64:
28516 case INT64_FTYPE_V4SF:
28517 case INT64_FTYPE_V2DF:
28518 case INT_FTYPE_V16QI:
28519 case INT_FTYPE_V8QI:
28520 case INT_FTYPE_V8SF:
28521 case INT_FTYPE_V4DF:
28522 case INT_FTYPE_V4SF:
28523 case INT_FTYPE_V2DF:
28524 case INT_FTYPE_V32QI:
28525 case V16QI_FTYPE_V16QI:
28526 case V8SI_FTYPE_V8SF:
28527 case V8SI_FTYPE_V4SI:
28528 case V8HI_FTYPE_V8HI:
28529 case V8HI_FTYPE_V16QI:
28530 case V8QI_FTYPE_V8QI:
28531 case V8SF_FTYPE_V8SF:
28532 case V8SF_FTYPE_V8SI:
28533 case V8SF_FTYPE_V4SF:
28534 case V8SF_FTYPE_V8HI:
28535 case V4SI_FTYPE_V4SI:
28536 case V4SI_FTYPE_V16QI:
28537 case V4SI_FTYPE_V4SF:
28538 case V4SI_FTYPE_V8SI:
28539 case V4SI_FTYPE_V8HI:
28540 case V4SI_FTYPE_V4DF:
28541 case V4SI_FTYPE_V2DF:
28542 case V4HI_FTYPE_V4HI:
28543 case V4DF_FTYPE_V4DF:
28544 case V4DF_FTYPE_V4SI:
28545 case V4DF_FTYPE_V4SF:
28546 case V4DF_FTYPE_V2DF:
28547 case V4SF_FTYPE_V4SF:
28548 case V4SF_FTYPE_V4SI:
28549 case V4SF_FTYPE_V8SF:
28550 case V4SF_FTYPE_V4DF:
28551 case V4SF_FTYPE_V8HI:
28552 case V4SF_FTYPE_V2DF:
28553 case V2DI_FTYPE_V2DI:
28554 case V2DI_FTYPE_V16QI:
28555 case V2DI_FTYPE_V8HI:
28556 case V2DI_FTYPE_V4SI:
28557 case V2DF_FTYPE_V2DF:
28558 case V2DF_FTYPE_V4SI:
28559 case V2DF_FTYPE_V4DF:
28560 case V2DF_FTYPE_V4SF:
28561 case V2DF_FTYPE_V2SI:
28562 case V2SI_FTYPE_V2SI:
28563 case V2SI_FTYPE_V4SF:
28564 case V2SI_FTYPE_V2SF:
28565 case V2SI_FTYPE_V2DF:
28566 case V2SF_FTYPE_V2SF:
28567 case V2SF_FTYPE_V2SI:
28568 case V32QI_FTYPE_V32QI:
28569 case V32QI_FTYPE_V16QI:
28570 case V16HI_FTYPE_V16HI:
28571 case V16HI_FTYPE_V8HI:
28572 case V8SI_FTYPE_V8SI:
28573 case V16HI_FTYPE_V16QI:
28574 case V8SI_FTYPE_V16QI:
28575 case V4DI_FTYPE_V16QI:
28576 case V8SI_FTYPE_V8HI:
28577 case V4DI_FTYPE_V8HI:
28578 case V4DI_FTYPE_V4SI:
28579 case V4DI_FTYPE_V2DI:
28582 case V4SF_FTYPE_V4SF_VEC_MERGE:
28583 case V2DF_FTYPE_V2DF_VEC_MERGE:
28584 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
28585 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
28586 case V16QI_FTYPE_V16QI_V16QI:
28587 case V16QI_FTYPE_V8HI_V8HI:
28588 case V8QI_FTYPE_V8QI_V8QI:
28589 case V8QI_FTYPE_V4HI_V4HI:
28590 case V8HI_FTYPE_V8HI_V8HI:
28591 case V8HI_FTYPE_V16QI_V16QI:
28592 case V8HI_FTYPE_V4SI_V4SI:
28593 case V8SF_FTYPE_V8SF_V8SF:
28594 case V8SF_FTYPE_V8SF_V8SI:
28595 case V4SI_FTYPE_V4SI_V4SI:
28596 case V4SI_FTYPE_V8HI_V8HI:
28597 case V4SI_FTYPE_V4SF_V4SF:
28598 case V4SI_FTYPE_V2DF_V2DF:
28599 case V4HI_FTYPE_V4HI_V4HI:
28600 case V4HI_FTYPE_V8QI_V8QI:
28601 case V4HI_FTYPE_V2SI_V2SI:
28602 case V4DF_FTYPE_V4DF_V4DF:
28603 case V4DF_FTYPE_V4DF_V4DI:
28604 case V4SF_FTYPE_V4SF_V4SF:
28605 case V4SF_FTYPE_V4SF_V4SI:
28606 case V4SF_FTYPE_V4SF_V2SI:
28607 case V4SF_FTYPE_V4SF_V2DF:
28608 case V4SF_FTYPE_V4SF_DI:
28609 case V4SF_FTYPE_V4SF_SI:
28610 case V2DI_FTYPE_V2DI_V2DI:
28611 case V2DI_FTYPE_V16QI_V16QI:
28612 case V2DI_FTYPE_V4SI_V4SI:
28613 case V2DI_FTYPE_V2DI_V16QI:
28614 case V2DI_FTYPE_V2DF_V2DF:
28615 case V2SI_FTYPE_V2SI_V2SI:
28616 case V2SI_FTYPE_V4HI_V4HI:
28617 case V2SI_FTYPE_V2SF_V2SF:
28618 case V2DF_FTYPE_V2DF_V2DF:
28619 case V2DF_FTYPE_V2DF_V4SF:
28620 case V2DF_FTYPE_V2DF_V2DI:
28621 case V2DF_FTYPE_V2DF_DI:
28622 case V2DF_FTYPE_V2DF_SI:
28623 case V2SF_FTYPE_V2SF_V2SF:
28624 case V1DI_FTYPE_V1DI_V1DI:
28625 case V1DI_FTYPE_V8QI_V8QI:
28626 case V1DI_FTYPE_V2SI_V2SI:
28627 case V32QI_FTYPE_V16HI_V16HI:
28628 case V16HI_FTYPE_V8SI_V8SI:
28629 case V32QI_FTYPE_V32QI_V32QI:
28630 case V16HI_FTYPE_V32QI_V32QI:
28631 case V16HI_FTYPE_V16HI_V16HI:
28632 case V8SI_FTYPE_V4DF_V4DF:
28633 case V8SI_FTYPE_V8SI_V8SI:
28634 case V8SI_FTYPE_V16HI_V16HI:
28635 case V4DI_FTYPE_V4DI_V4DI:
28636 case V4DI_FTYPE_V8SI_V8SI:
28637 if (comparison == UNKNOWN)
28638 return ix86_expand_binop_builtin (icode, exp, target);
28641 case V4SF_FTYPE_V4SF_V4SF_SWAP:
28642 case V2DF_FTYPE_V2DF_V2DF_SWAP:
28643 gcc_assert (comparison != UNKNOWN);
28647 case V16HI_FTYPE_V16HI_V8HI_COUNT:
28648 case V16HI_FTYPE_V16HI_SI_COUNT:
28649 case V8SI_FTYPE_V8SI_V4SI_COUNT:
28650 case V8SI_FTYPE_V8SI_SI_COUNT:
28651 case V4DI_FTYPE_V4DI_V2DI_COUNT:
28652 case V4DI_FTYPE_V4DI_INT_COUNT:
28653 case V8HI_FTYPE_V8HI_V8HI_COUNT:
28654 case V8HI_FTYPE_V8HI_SI_COUNT:
28655 case V4SI_FTYPE_V4SI_V4SI_COUNT:
28656 case V4SI_FTYPE_V4SI_SI_COUNT:
28657 case V4HI_FTYPE_V4HI_V4HI_COUNT:
28658 case V4HI_FTYPE_V4HI_SI_COUNT:
28659 case V2DI_FTYPE_V2DI_V2DI_COUNT:
28660 case V2DI_FTYPE_V2DI_SI_COUNT:
28661 case V2SI_FTYPE_V2SI_V2SI_COUNT:
28662 case V2SI_FTYPE_V2SI_SI_COUNT:
28663 case V1DI_FTYPE_V1DI_V1DI_COUNT:
28664 case V1DI_FTYPE_V1DI_SI_COUNT:
28666 last_arg_count = true;
28668 case UINT64_FTYPE_UINT64_UINT64:
28669 case UINT_FTYPE_UINT_UINT:
28670 case UINT_FTYPE_UINT_USHORT:
28671 case UINT_FTYPE_UINT_UCHAR:
28672 case UINT16_FTYPE_UINT16_INT:
28673 case UINT8_FTYPE_UINT8_INT:
28676 case V2DI_FTYPE_V2DI_INT_CONVERT:
28679 nargs_constant = 1;
28681 case V4DI_FTYPE_V4DI_INT_CONVERT:
28684 nargs_constant = 1;
28686 case V8HI_FTYPE_V8HI_INT:
28687 case V8HI_FTYPE_V8SF_INT:
28688 case V8HI_FTYPE_V4SF_INT:
28689 case V8SF_FTYPE_V8SF_INT:
28690 case V4SI_FTYPE_V4SI_INT:
28691 case V4SI_FTYPE_V8SI_INT:
28692 case V4HI_FTYPE_V4HI_INT:
28693 case V4DF_FTYPE_V4DF_INT:
28694 case V4SF_FTYPE_V4SF_INT:
28695 case V4SF_FTYPE_V8SF_INT:
28696 case V2DI_FTYPE_V2DI_INT:
28697 case V2DF_FTYPE_V2DF_INT:
28698 case V2DF_FTYPE_V4DF_INT:
28699 case V16HI_FTYPE_V16HI_INT:
28700 case V8SI_FTYPE_V8SI_INT:
28701 case V4DI_FTYPE_V4DI_INT:
28702 case V2DI_FTYPE_V4DI_INT:
28704 nargs_constant = 1;
28706 case V16QI_FTYPE_V16QI_V16QI_V16QI:
28707 case V8SF_FTYPE_V8SF_V8SF_V8SF:
28708 case V4DF_FTYPE_V4DF_V4DF_V4DF:
28709 case V4SF_FTYPE_V4SF_V4SF_V4SF:
28710 case V2DF_FTYPE_V2DF_V2DF_V2DF:
28711 case V32QI_FTYPE_V32QI_V32QI_V32QI:
28714 case V32QI_FTYPE_V32QI_V32QI_INT:
28715 case V16HI_FTYPE_V16HI_V16HI_INT:
28716 case V16QI_FTYPE_V16QI_V16QI_INT:
28717 case V4DI_FTYPE_V4DI_V4DI_INT:
28718 case V8HI_FTYPE_V8HI_V8HI_INT:
28719 case V8SI_FTYPE_V8SI_V8SI_INT:
28720 case V8SI_FTYPE_V8SI_V4SI_INT:
28721 case V8SF_FTYPE_V8SF_V8SF_INT:
28722 case V8SF_FTYPE_V8SF_V4SF_INT:
28723 case V4SI_FTYPE_V4SI_V4SI_INT:
28724 case V4DF_FTYPE_V4DF_V4DF_INT:
28725 case V4DF_FTYPE_V4DF_V2DF_INT:
28726 case V4SF_FTYPE_V4SF_V4SF_INT:
28727 case V2DI_FTYPE_V2DI_V2DI_INT:
28728 case V4DI_FTYPE_V4DI_V2DI_INT:
28729 case V2DF_FTYPE_V2DF_V2DF_INT:
28731 nargs_constant = 1;
28733 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
28736 nargs_constant = 1;
28738 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
28741 nargs_constant = 1;
28743 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
28746 nargs_constant = 1;
28748 case V2DI_FTYPE_V2DI_UINT_UINT:
28750 nargs_constant = 2;
28752 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
28753 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
28754 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
28755 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
28757 nargs_constant = 1;
28759 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
28761 nargs_constant = 2;
28764 gcc_unreachable ();
28767 gcc_assert (nargs <= ARRAY_SIZE (args));
28769 if (comparison != UNKNOWN)
28771 gcc_assert (nargs == 2);
28772 return ix86_expand_sse_compare (d, exp, target, swap);
28775 if (rmode == VOIDmode || rmode == tmode)
28779 || GET_MODE (target) != tmode
28780 || !insn_p->operand[0].predicate (target, tmode))
28781 target = gen_reg_rtx (tmode);
28782 real_target = target;
28786 target = gen_reg_rtx (rmode);
28787 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
28790 for (i = 0; i < nargs; i++)
28792 tree arg = CALL_EXPR_ARG (exp, i);
28793 rtx op = expand_normal (arg);
28794 enum machine_mode mode = insn_p->operand[i + 1].mode;
28795 bool match = insn_p->operand[i + 1].predicate (op, mode);
28797 if (last_arg_count && (i + 1) == nargs)
28799 /* SIMD shift insns take either an 8-bit immediate or
28800 register as count. But builtin functions take int as
28801 count. If count doesn't match, we put it in register. */
28804 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
28805 if (!insn_p->operand[i + 1].predicate (op, mode))
28806 op = copy_to_reg (op);
28809 else if ((nargs - i) <= nargs_constant)
28814 case CODE_FOR_avx2_inserti128:
28815 case CODE_FOR_avx2_extracti128:
28816 error ("the last argument must be an 1-bit immediate");
28819 case CODE_FOR_sse4_1_roundsd:
28820 case CODE_FOR_sse4_1_roundss:
28822 case CODE_FOR_sse4_1_roundpd:
28823 case CODE_FOR_sse4_1_roundps:
28824 case CODE_FOR_avx_roundpd256:
28825 case CODE_FOR_avx_roundps256:
28827 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
28828 case CODE_FOR_sse4_1_roundps_sfix:
28829 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
28830 case CODE_FOR_avx_roundps_sfix256:
28832 case CODE_FOR_sse4_1_blendps:
28833 case CODE_FOR_avx_blendpd256:
28834 case CODE_FOR_avx_vpermilv4df:
28835 error ("the last argument must be a 4-bit immediate");
28838 case CODE_FOR_sse4_1_blendpd:
28839 case CODE_FOR_avx_vpermilv2df:
28840 case CODE_FOR_xop_vpermil2v2df3:
28841 case CODE_FOR_xop_vpermil2v4sf3:
28842 case CODE_FOR_xop_vpermil2v4df3:
28843 case CODE_FOR_xop_vpermil2v8sf3:
28844 error ("the last argument must be a 2-bit immediate");
28847 case CODE_FOR_avx_vextractf128v4df:
28848 case CODE_FOR_avx_vextractf128v8sf:
28849 case CODE_FOR_avx_vextractf128v8si:
28850 case CODE_FOR_avx_vinsertf128v4df:
28851 case CODE_FOR_avx_vinsertf128v8sf:
28852 case CODE_FOR_avx_vinsertf128v8si:
28853 error ("the last argument must be a 1-bit immediate");
28856 case CODE_FOR_avx_vmcmpv2df3:
28857 case CODE_FOR_avx_vmcmpv4sf3:
28858 case CODE_FOR_avx_cmpv2df3:
28859 case CODE_FOR_avx_cmpv4sf3:
28860 case CODE_FOR_avx_cmpv4df3:
28861 case CODE_FOR_avx_cmpv8sf3:
28862 error ("the last argument must be a 5-bit immediate");
28866 switch (nargs_constant)
28869 if ((nargs - i) == nargs_constant)
28871 error ("the next to last argument must be an 8-bit immediate");
28875 error ("the last argument must be an 8-bit immediate");
28878 gcc_unreachable ();
28885 if (VECTOR_MODE_P (mode))
28886 op = safe_vector_operand (op, mode);
28888 /* If we aren't optimizing, only allow one memory operand to
28890 if (memory_operand (op, mode))
28893 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
28895 if (optimize || !match || num_memory > 1)
28896 op = copy_to_mode_reg (mode, op);
28900 op = copy_to_reg (op);
28901 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
28906 args[i].mode = mode;
28912 pat = GEN_FCN (icode) (real_target, args[0].op);
28915 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
28918 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
28922 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
28923 args[2].op, args[3].op);
28926 gcc_unreachable ();
28936 /* Subroutine of ix86_expand_builtin to take care of special insns
28937 with variable number of operands. */
28940 ix86_expand_special_args_builtin (const struct builtin_description *d,
28941 tree exp, rtx target)
28945 unsigned int i, nargs, arg_adjust, memory;
28949 enum machine_mode mode;
28951 enum insn_code icode = d->icode;
28952 bool last_arg_constant = false;
28953 const struct insn_data_d *insn_p = &insn_data[icode];
28954 enum machine_mode tmode = insn_p->operand[0].mode;
28955 enum { load, store } klass;
28957 switch ((enum ix86_builtin_func_type) d->flag)
28959 case VOID_FTYPE_VOID:
28960 if (icode == CODE_FOR_avx_vzeroupper)
28961 target = GEN_INT (vzeroupper_intrinsic);
28962 emit_insn (GEN_FCN (icode) (target));
28964 case VOID_FTYPE_UINT64:
28965 case VOID_FTYPE_UNSIGNED:
28970 case UINT64_FTYPE_VOID:
28971 case UNSIGNED_FTYPE_VOID:
28976 case UINT64_FTYPE_PUNSIGNED:
28977 case V2DI_FTYPE_PV2DI:
28978 case V4DI_FTYPE_PV4DI:
28979 case V32QI_FTYPE_PCCHAR:
28980 case V16QI_FTYPE_PCCHAR:
28981 case V8SF_FTYPE_PCV4SF:
28982 case V8SF_FTYPE_PCFLOAT:
28983 case V4SF_FTYPE_PCFLOAT:
28984 case V4DF_FTYPE_PCV2DF:
28985 case V4DF_FTYPE_PCDOUBLE:
28986 case V2DF_FTYPE_PCDOUBLE:
28987 case VOID_FTYPE_PVOID:
28992 case VOID_FTYPE_PV2SF_V4SF:
28993 case VOID_FTYPE_PV4DI_V4DI:
28994 case VOID_FTYPE_PV2DI_V2DI:
28995 case VOID_FTYPE_PCHAR_V32QI:
28996 case VOID_FTYPE_PCHAR_V16QI:
28997 case VOID_FTYPE_PFLOAT_V8SF:
28998 case VOID_FTYPE_PFLOAT_V4SF:
28999 case VOID_FTYPE_PDOUBLE_V4DF:
29000 case VOID_FTYPE_PDOUBLE_V2DF:
29001 case VOID_FTYPE_PLONGLONG_LONGLONG:
29002 case VOID_FTYPE_PULONGLONG_ULONGLONG:
29003 case VOID_FTYPE_PINT_INT:
29006 /* Reserve memory operand for target. */
29007 memory = ARRAY_SIZE (args);
29009 case V4SF_FTYPE_V4SF_PCV2SF:
29010 case V2DF_FTYPE_V2DF_PCDOUBLE:
29015 case V8SF_FTYPE_PCV8SF_V8SI:
29016 case V4DF_FTYPE_PCV4DF_V4DI:
29017 case V4SF_FTYPE_PCV4SF_V4SI:
29018 case V2DF_FTYPE_PCV2DF_V2DI:
29019 case V8SI_FTYPE_PCV8SI_V8SI:
29020 case V4DI_FTYPE_PCV4DI_V4DI:
29021 case V4SI_FTYPE_PCV4SI_V4SI:
29022 case V2DI_FTYPE_PCV2DI_V2DI:
29027 case VOID_FTYPE_PV8SF_V8SI_V8SF:
29028 case VOID_FTYPE_PV4DF_V4DI_V4DF:
29029 case VOID_FTYPE_PV4SF_V4SI_V4SF:
29030 case VOID_FTYPE_PV2DF_V2DI_V2DF:
29031 case VOID_FTYPE_PV8SI_V8SI_V8SI:
29032 case VOID_FTYPE_PV4DI_V4DI_V4DI:
29033 case VOID_FTYPE_PV4SI_V4SI_V4SI:
29034 case VOID_FTYPE_PV2DI_V2DI_V2DI:
29037 /* Reserve memory operand for target. */
29038 memory = ARRAY_SIZE (args);
29040 case VOID_FTYPE_UINT_UINT_UINT:
29041 case VOID_FTYPE_UINT64_UINT_UINT:
29042 case UCHAR_FTYPE_UINT_UINT_UINT:
29043 case UCHAR_FTYPE_UINT64_UINT_UINT:
29046 memory = ARRAY_SIZE (args);
29047 last_arg_constant = true;
29050 gcc_unreachable ();
29053 gcc_assert (nargs <= ARRAY_SIZE (args));
29055 if (klass == store)
29057 arg = CALL_EXPR_ARG (exp, 0);
29058 op = expand_normal (arg);
29059 gcc_assert (target == 0);
29062 if (GET_MODE (op) != Pmode)
29063 op = convert_to_mode (Pmode, op, 1);
29064 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
29067 target = force_reg (tmode, op);
29075 || !register_operand (target, tmode)
29076 || GET_MODE (target) != tmode)
29077 target = gen_reg_rtx (tmode);
29080 for (i = 0; i < nargs; i++)
29082 enum machine_mode mode = insn_p->operand[i + 1].mode;
29085 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
29086 op = expand_normal (arg);
29087 match = insn_p->operand[i + 1].predicate (op, mode);
29089 if (last_arg_constant && (i + 1) == nargs)
29093 if (icode == CODE_FOR_lwp_lwpvalsi3
29094 || icode == CODE_FOR_lwp_lwpinssi3
29095 || icode == CODE_FOR_lwp_lwpvaldi3
29096 || icode == CODE_FOR_lwp_lwpinsdi3)
29097 error ("the last argument must be a 32-bit immediate");
29099 error ("the last argument must be an 8-bit immediate");
29107 /* This must be the memory operand. */
29108 if (GET_MODE (op) != Pmode)
29109 op = convert_to_mode (Pmode, op, 1);
29110 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
29111 gcc_assert (GET_MODE (op) == mode
29112 || GET_MODE (op) == VOIDmode);
29116 /* This must be register. */
29117 if (VECTOR_MODE_P (mode))
29118 op = safe_vector_operand (op, mode);
29120 gcc_assert (GET_MODE (op) == mode
29121 || GET_MODE (op) == VOIDmode);
29122 op = copy_to_mode_reg (mode, op);
29127 args[i].mode = mode;
29133 pat = GEN_FCN (icode) (target);
29136 pat = GEN_FCN (icode) (target, args[0].op);
29139 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
29142 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
29145 gcc_unreachable ();
29151 return klass == store ? 0 : target;
29154 /* Return the integer constant in ARG. Constrain it to be in the range
29155 of the subparts of VEC_TYPE; issue an error if not. */
29158 get_element_number (tree vec_type, tree arg)
29160 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
29162 if (!host_integerp (arg, 1)
29163 || (elt = tree_low_cst (arg, 1), elt > max))
29165 error ("selector must be an integer constant in the range 0..%wi", max);
29172 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29173 ix86_expand_vector_init. We DO have language-level syntax for this, in
29174 the form of (type){ init-list }. Except that since we can't place emms
29175 instructions from inside the compiler, we can't allow the use of MMX
29176 registers unless the user explicitly asks for it. So we do *not* define
29177 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29178 we have builtins invoked by mmintrin.h that gives us license to emit
29179 these sorts of instructions. */
29182 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
29184 enum machine_mode tmode = TYPE_MODE (type);
29185 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
29186 int i, n_elt = GET_MODE_NUNITS (tmode);
29187 rtvec v = rtvec_alloc (n_elt);
29189 gcc_assert (VECTOR_MODE_P (tmode));
29190 gcc_assert (call_expr_nargs (exp) == n_elt);
29192 for (i = 0; i < n_elt; ++i)
29194 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
29195 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
29198 if (!target || !register_operand (target, tmode))
29199 target = gen_reg_rtx (tmode);
29201 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
29205 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29206 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29207 had a language-level syntax for referencing vector elements. */
29210 ix86_expand_vec_ext_builtin (tree exp, rtx target)
29212 enum machine_mode tmode, mode0;
29217 arg0 = CALL_EXPR_ARG (exp, 0);
29218 arg1 = CALL_EXPR_ARG (exp, 1);
29220 op0 = expand_normal (arg0);
29221 elt = get_element_number (TREE_TYPE (arg0), arg1);
29223 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
29224 mode0 = TYPE_MODE (TREE_TYPE (arg0));
29225 gcc_assert (VECTOR_MODE_P (mode0));
29227 op0 = force_reg (mode0, op0);
29229 if (optimize || !target || !register_operand (target, tmode))
29230 target = gen_reg_rtx (tmode);
29232 ix86_expand_vector_extract (true, target, op0, elt);
29237 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29238 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29239 a language-level syntax for referencing vector elements. */
29242 ix86_expand_vec_set_builtin (tree exp)
29244 enum machine_mode tmode, mode1;
29245 tree arg0, arg1, arg2;
29247 rtx op0, op1, target;
29249 arg0 = CALL_EXPR_ARG (exp, 0);
29250 arg1 = CALL_EXPR_ARG (exp, 1);
29251 arg2 = CALL_EXPR_ARG (exp, 2);
29253 tmode = TYPE_MODE (TREE_TYPE (arg0));
29254 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
29255 gcc_assert (VECTOR_MODE_P (tmode));
29257 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
29258 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
29259 elt = get_element_number (TREE_TYPE (arg0), arg2);
29261 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
29262 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
29264 op0 = force_reg (tmode, op0);
29265 op1 = force_reg (mode1, op1);
29267 /* OP0 is the source of these builtin functions and shouldn't be
29268 modified. Create a copy, use it and return it as target. */
29269 target = gen_reg_rtx (tmode);
29270 emit_move_insn (target, op0);
29271 ix86_expand_vector_set (true, target, op1, elt);
29276 /* Expand an expression EXP that calls a built-in function,
29277 with result going to TARGET if that's convenient
29278 (and in mode MODE if that's convenient).
29279 SUBTARGET may be used as the target for computing one of EXP's operands.
29280 IGNORE is nonzero if the value is to be ignored. */
29283 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
29284 enum machine_mode mode ATTRIBUTE_UNUSED,
29285 int ignore ATTRIBUTE_UNUSED)
29287 const struct builtin_description *d;
29289 enum insn_code icode;
29290 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
29291 tree arg0, arg1, arg2, arg3, arg4;
29292 rtx op0, op1, op2, op3, op4, pat;
29293 enum machine_mode mode0, mode1, mode2, mode3, mode4;
29294 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
29296 /* Determine whether the builtin function is available under the current ISA.
29297 Originally the builtin was not created if it wasn't applicable to the
29298 current ISA based on the command line switches. With function specific
29299 options, we need to check in the context of the function making the call
29300 whether it is supported. */
29301 if (ix86_builtins_isa[fcode].isa
29302 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
29304 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
29305 NULL, (enum fpmath_unit) 0, false);
29308 error ("%qE needs unknown isa option", fndecl);
29311 gcc_assert (opts != NULL);
29312 error ("%qE needs isa option %s", fndecl, opts);
29320 case IX86_BUILTIN_MASKMOVQ:
29321 case IX86_BUILTIN_MASKMOVDQU:
29322 icode = (fcode == IX86_BUILTIN_MASKMOVQ
29323 ? CODE_FOR_mmx_maskmovq
29324 : CODE_FOR_sse2_maskmovdqu);
29325 /* Note the arg order is different from the operand order. */
29326 arg1 = CALL_EXPR_ARG (exp, 0);
29327 arg2 = CALL_EXPR_ARG (exp, 1);
29328 arg0 = CALL_EXPR_ARG (exp, 2);
29329 op0 = expand_normal (arg0);
29330 op1 = expand_normal (arg1);
29331 op2 = expand_normal (arg2);
29332 mode0 = insn_data[icode].operand[0].mode;
29333 mode1 = insn_data[icode].operand[1].mode;
29334 mode2 = insn_data[icode].operand[2].mode;
29336 if (GET_MODE (op0) != Pmode)
29337 op0 = convert_to_mode (Pmode, op0, 1);
29338 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
29340 if (!insn_data[icode].operand[0].predicate (op0, mode0))
29341 op0 = copy_to_mode_reg (mode0, op0);
29342 if (!insn_data[icode].operand[1].predicate (op1, mode1))
29343 op1 = copy_to_mode_reg (mode1, op1);
29344 if (!insn_data[icode].operand[2].predicate (op2, mode2))
29345 op2 = copy_to_mode_reg (mode2, op2);
29346 pat = GEN_FCN (icode) (op0, op1, op2);
29352 case IX86_BUILTIN_LDMXCSR:
29353 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
29354 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
29355 emit_move_insn (target, op0);
29356 emit_insn (gen_sse_ldmxcsr (target));
29359 case IX86_BUILTIN_STMXCSR:
29360 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
29361 emit_insn (gen_sse_stmxcsr (target));
29362 return copy_to_mode_reg (SImode, target);
29364 case IX86_BUILTIN_CLFLUSH:
29365 arg0 = CALL_EXPR_ARG (exp, 0);
29366 op0 = expand_normal (arg0);
29367 icode = CODE_FOR_sse2_clflush;
29368 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
29370 if (GET_MODE (op0) != Pmode)
29371 op0 = convert_to_mode (Pmode, op0, 1);
29372 op0 = force_reg (Pmode, op0);
29375 emit_insn (gen_sse2_clflush (op0));
29378 case IX86_BUILTIN_MONITOR:
29379 arg0 = CALL_EXPR_ARG (exp, 0);
29380 arg1 = CALL_EXPR_ARG (exp, 1);
29381 arg2 = CALL_EXPR_ARG (exp, 2);
29382 op0 = expand_normal (arg0);
29383 op1 = expand_normal (arg1);
29384 op2 = expand_normal (arg2);
29387 if (GET_MODE (op0) != Pmode)
29388 op0 = convert_to_mode (Pmode, op0, 1);
29389 op0 = force_reg (Pmode, op0);
29392 op1 = copy_to_mode_reg (SImode, op1);
29394 op2 = copy_to_mode_reg (SImode, op2);
29395 emit_insn (ix86_gen_monitor (op0, op1, op2));
29398 case IX86_BUILTIN_MWAIT:
29399 arg0 = CALL_EXPR_ARG (exp, 0);
29400 arg1 = CALL_EXPR_ARG (exp, 1);
29401 op0 = expand_normal (arg0);
29402 op1 = expand_normal (arg1);
29404 op0 = copy_to_mode_reg (SImode, op0);
29406 op1 = copy_to_mode_reg (SImode, op1);
29407 emit_insn (gen_sse3_mwait (op0, op1));
29410 case IX86_BUILTIN_VEC_INIT_V2SI:
29411 case IX86_BUILTIN_VEC_INIT_V4HI:
29412 case IX86_BUILTIN_VEC_INIT_V8QI:
29413 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
29415 case IX86_BUILTIN_VEC_EXT_V2DF:
29416 case IX86_BUILTIN_VEC_EXT_V2DI:
29417 case IX86_BUILTIN_VEC_EXT_V4SF:
29418 case IX86_BUILTIN_VEC_EXT_V4SI:
29419 case IX86_BUILTIN_VEC_EXT_V8HI:
29420 case IX86_BUILTIN_VEC_EXT_V2SI:
29421 case IX86_BUILTIN_VEC_EXT_V4HI:
29422 case IX86_BUILTIN_VEC_EXT_V16QI:
29423 return ix86_expand_vec_ext_builtin (exp, target);
29425 case IX86_BUILTIN_VEC_SET_V2DI:
29426 case IX86_BUILTIN_VEC_SET_V4SF:
29427 case IX86_BUILTIN_VEC_SET_V4SI:
29428 case IX86_BUILTIN_VEC_SET_V8HI:
29429 case IX86_BUILTIN_VEC_SET_V4HI:
29430 case IX86_BUILTIN_VEC_SET_V16QI:
29431 return ix86_expand_vec_set_builtin (exp);
29433 case IX86_BUILTIN_INFQ:
29434 case IX86_BUILTIN_HUGE_VALQ:
29436 REAL_VALUE_TYPE inf;
29440 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
29442 tmp = validize_mem (force_const_mem (mode, tmp));
29445 target = gen_reg_rtx (mode);
29447 emit_move_insn (target, tmp);
29451 case IX86_BUILTIN_LLWPCB:
29452 arg0 = CALL_EXPR_ARG (exp, 0);
29453 op0 = expand_normal (arg0);
29454 icode = CODE_FOR_lwp_llwpcb;
29455 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
29457 if (GET_MODE (op0) != Pmode)
29458 op0 = convert_to_mode (Pmode, op0, 1);
29459 op0 = force_reg (Pmode, op0);
29461 emit_insn (gen_lwp_llwpcb (op0));
29464 case IX86_BUILTIN_SLWPCB:
29465 icode = CODE_FOR_lwp_slwpcb;
29467 || !insn_data[icode].operand[0].predicate (target, Pmode))
29468 target = gen_reg_rtx (Pmode);
29469 emit_insn (gen_lwp_slwpcb (target));
29472 case IX86_BUILTIN_BEXTRI32:
29473 case IX86_BUILTIN_BEXTRI64:
29474 arg0 = CALL_EXPR_ARG (exp, 0);
29475 arg1 = CALL_EXPR_ARG (exp, 1);
29476 op0 = expand_normal (arg0);
29477 op1 = expand_normal (arg1);
29478 icode = (fcode == IX86_BUILTIN_BEXTRI32
29479 ? CODE_FOR_tbm_bextri_si
29480 : CODE_FOR_tbm_bextri_di);
29481 if (!CONST_INT_P (op1))
29483 error ("last argument must be an immediate");
29488 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
29489 unsigned char lsb_index = INTVAL (op1) & 0xFF;
29490 op1 = GEN_INT (length);
29491 op2 = GEN_INT (lsb_index);
29492 pat = GEN_FCN (icode) (target, op0, op1, op2);
29498 case IX86_BUILTIN_RDRAND16_STEP:
29499 icode = CODE_FOR_rdrandhi_1;
29503 case IX86_BUILTIN_RDRAND32_STEP:
29504 icode = CODE_FOR_rdrandsi_1;
29508 case IX86_BUILTIN_RDRAND64_STEP:
29509 icode = CODE_FOR_rdranddi_1;
29513 op0 = gen_reg_rtx (mode0);
29514 emit_insn (GEN_FCN (icode) (op0));
29516 arg0 = CALL_EXPR_ARG (exp, 0);
29517 op1 = expand_normal (arg0);
29518 if (!address_operand (op1, VOIDmode))
29520 op1 = convert_memory_address (Pmode, op1);
29521 op1 = copy_addr_to_reg (op1);
29523 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
29525 op1 = gen_reg_rtx (SImode);
29526 emit_move_insn (op1, CONST1_RTX (SImode));
29528 /* Emit SImode conditional move. */
29529 if (mode0 == HImode)
29531 op2 = gen_reg_rtx (SImode);
29532 emit_insn (gen_zero_extendhisi2 (op2, op0));
29534 else if (mode0 == SImode)
29537 op2 = gen_rtx_SUBREG (SImode, op0, 0);
29540 target = gen_reg_rtx (SImode);
29542 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
29544 emit_insn (gen_rtx_SET (VOIDmode, target,
29545 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
29548 case IX86_BUILTIN_GATHERSIV2DF:
29549 icode = CODE_FOR_avx2_gathersiv2df;
29551 case IX86_BUILTIN_GATHERSIV4DF:
29552 icode = CODE_FOR_avx2_gathersiv4df;
29554 case IX86_BUILTIN_GATHERDIV2DF:
29555 icode = CODE_FOR_avx2_gatherdiv2df;
29557 case IX86_BUILTIN_GATHERDIV4DF:
29558 icode = CODE_FOR_avx2_gatherdiv4df;
29560 case IX86_BUILTIN_GATHERSIV4SF:
29561 icode = CODE_FOR_avx2_gathersiv4sf;
29563 case IX86_BUILTIN_GATHERSIV8SF:
29564 icode = CODE_FOR_avx2_gathersiv8sf;
29566 case IX86_BUILTIN_GATHERDIV4SF:
29567 icode = CODE_FOR_avx2_gatherdiv4sf;
29569 case IX86_BUILTIN_GATHERDIV8SF:
29570 icode = CODE_FOR_avx2_gatherdiv8sf;
29572 case IX86_BUILTIN_GATHERSIV2DI:
29573 icode = CODE_FOR_avx2_gathersiv2di;
29575 case IX86_BUILTIN_GATHERSIV4DI:
29576 icode = CODE_FOR_avx2_gathersiv4di;
29578 case IX86_BUILTIN_GATHERDIV2DI:
29579 icode = CODE_FOR_avx2_gatherdiv2di;
29581 case IX86_BUILTIN_GATHERDIV4DI:
29582 icode = CODE_FOR_avx2_gatherdiv4di;
29584 case IX86_BUILTIN_GATHERSIV4SI:
29585 icode = CODE_FOR_avx2_gathersiv4si;
29587 case IX86_BUILTIN_GATHERSIV8SI:
29588 icode = CODE_FOR_avx2_gathersiv8si;
29590 case IX86_BUILTIN_GATHERDIV4SI:
29591 icode = CODE_FOR_avx2_gatherdiv4si;
29593 case IX86_BUILTIN_GATHERDIV8SI:
29594 icode = CODE_FOR_avx2_gatherdiv8si;
29596 case IX86_BUILTIN_GATHERALTSIV4DF:
29597 icode = CODE_FOR_avx2_gathersiv4df;
29599 case IX86_BUILTIN_GATHERALTDIV8SF:
29600 icode = CODE_FOR_avx2_gatherdiv8sf;
29602 case IX86_BUILTIN_GATHERALTSIV4DI:
29603 icode = CODE_FOR_avx2_gathersiv4di;
29605 case IX86_BUILTIN_GATHERALTDIV8SI:
29606 icode = CODE_FOR_avx2_gatherdiv8si;
29610 arg0 = CALL_EXPR_ARG (exp, 0);
29611 arg1 = CALL_EXPR_ARG (exp, 1);
29612 arg2 = CALL_EXPR_ARG (exp, 2);
29613 arg3 = CALL_EXPR_ARG (exp, 3);
29614 arg4 = CALL_EXPR_ARG (exp, 4);
29615 op0 = expand_normal (arg0);
29616 op1 = expand_normal (arg1);
29617 op2 = expand_normal (arg2);
29618 op3 = expand_normal (arg3);
29619 op4 = expand_normal (arg4);
29620 /* Note the arg order is different from the operand order. */
29621 mode0 = insn_data[icode].operand[1].mode;
29622 mode2 = insn_data[icode].operand[3].mode;
29623 mode3 = insn_data[icode].operand[4].mode;
29624 mode4 = insn_data[icode].operand[5].mode;
29626 if (target == NULL_RTX
29627 || GET_MODE (target) != insn_data[icode].operand[0].mode)
29628 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
29630 subtarget = target;
29632 if (fcode == IX86_BUILTIN_GATHERALTSIV4DF
29633 || fcode == IX86_BUILTIN_GATHERALTSIV4DI)
29635 rtx half = gen_reg_rtx (V4SImode);
29636 if (!nonimmediate_operand (op2, V8SImode))
29637 op2 = copy_to_mode_reg (V8SImode, op2);
29638 emit_insn (gen_vec_extract_lo_v8si (half, op2));
29641 else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF
29642 || fcode == IX86_BUILTIN_GATHERALTDIV8SI)
29644 rtx (*gen) (rtx, rtx);
29645 rtx half = gen_reg_rtx (mode0);
29646 if (mode0 == V4SFmode)
29647 gen = gen_vec_extract_lo_v8sf;
29649 gen = gen_vec_extract_lo_v8si;
29650 if (!nonimmediate_operand (op0, GET_MODE (op0)))
29651 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
29652 emit_insn (gen (half, op0));
29654 if (!nonimmediate_operand (op3, GET_MODE (op3)))
29655 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
29656 emit_insn (gen (half, op3));
29660 /* Force memory operand only with base register here. But we
29661 don't want to do it on memory operand for other builtin
29663 if (GET_MODE (op1) != Pmode)
29664 op1 = convert_to_mode (Pmode, op1, 1);
29665 op1 = force_reg (Pmode, op1);
29667 if (!insn_data[icode].operand[1].predicate (op0, mode0))
29668 op0 = copy_to_mode_reg (mode0, op0);
29669 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
29670 op1 = copy_to_mode_reg (Pmode, op1);
29671 if (!insn_data[icode].operand[3].predicate (op2, mode2))
29672 op2 = copy_to_mode_reg (mode2, op2);
29673 if (!insn_data[icode].operand[4].predicate (op3, mode3))
29674 op3 = copy_to_mode_reg (mode3, op3);
29675 if (!insn_data[icode].operand[5].predicate (op4, mode4))
29677 error ("last argument must be scale 1, 2, 4, 8");
29681 /* Optimize. If mask is known to have all high bits set,
29682 replace op0 with pc_rtx to signal that the instruction
29683 overwrites the whole destination and doesn't use its
29684 previous contents. */
29687 if (TREE_CODE (arg3) == VECTOR_CST)
29690 unsigned int negative = 0;
29691 for (elt = TREE_VECTOR_CST_ELTS (arg3);
29692 elt; elt = TREE_CHAIN (elt))
29694 tree cst = TREE_VALUE (elt);
29695 if (TREE_CODE (cst) == INTEGER_CST
29696 && tree_int_cst_sign_bit (cst))
29698 else if (TREE_CODE (cst) == REAL_CST
29699 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
29702 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
29705 else if (TREE_CODE (arg3) == SSA_NAME)
29707 /* Recognize also when mask is like:
29708 __v2df src = _mm_setzero_pd ();
29709 __v2df mask = _mm_cmpeq_pd (src, src);
29711 __v8sf src = _mm256_setzero_ps ();
29712 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
29713 as that is a cheaper way to load all ones into
29714 a register than having to load a constant from
29716 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
29717 if (is_gimple_call (def_stmt))
29719 tree fndecl = gimple_call_fndecl (def_stmt);
29721 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
29722 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
29724 case IX86_BUILTIN_CMPPD:
29725 case IX86_BUILTIN_CMPPS:
29726 case IX86_BUILTIN_CMPPD256:
29727 case IX86_BUILTIN_CMPPS256:
29728 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
29731 case IX86_BUILTIN_CMPEQPD:
29732 case IX86_BUILTIN_CMPEQPS:
29733 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
29734 && initializer_zerop (gimple_call_arg (def_stmt,
29745 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
29750 if (fcode == IX86_BUILTIN_GATHERDIV8SF
29751 || fcode == IX86_BUILTIN_GATHERDIV8SI)
29753 enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode
29754 ? V4SFmode : V4SImode;
29755 if (target == NULL_RTX)
29756 target = gen_reg_rtx (tmode);
29757 if (tmode == V4SFmode)
29758 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
29760 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
29763 target = subtarget;
29771 for (i = 0, d = bdesc_special_args;
29772 i < ARRAY_SIZE (bdesc_special_args);
29774 if (d->code == fcode)
29775 return ix86_expand_special_args_builtin (d, exp, target);
29777 for (i = 0, d = bdesc_args;
29778 i < ARRAY_SIZE (bdesc_args);
29780 if (d->code == fcode)
29783 case IX86_BUILTIN_FABSQ:
29784 case IX86_BUILTIN_COPYSIGNQ:
29786 /* Emit a normal call if SSE2 isn't available. */
29787 return expand_call (exp, target, ignore);
29789 return ix86_expand_args_builtin (d, exp, target);
29792 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
29793 if (d->code == fcode)
29794 return ix86_expand_sse_comi (d, exp, target);
29796 for (i = 0, d = bdesc_pcmpestr;
29797 i < ARRAY_SIZE (bdesc_pcmpestr);
29799 if (d->code == fcode)
29800 return ix86_expand_sse_pcmpestr (d, exp, target);
29802 for (i = 0, d = bdesc_pcmpistr;
29803 i < ARRAY_SIZE (bdesc_pcmpistr);
29805 if (d->code == fcode)
29806 return ix86_expand_sse_pcmpistr (d, exp, target);
29808 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
29809 if (d->code == fcode)
29810 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
29811 (enum ix86_builtin_func_type)
29812 d->flag, d->comparison);
29814 gcc_unreachable ();
29817 /* Returns a function decl for a vectorized version of the builtin function
29818 with builtin function code FN and the result vector type TYPE, or NULL_TREE
29819 if it is not available. */
29822 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
29825 enum machine_mode in_mode, out_mode;
29827 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29829 if (TREE_CODE (type_out) != VECTOR_TYPE
29830 || TREE_CODE (type_in) != VECTOR_TYPE
29831 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
29834 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29835 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29836 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29837 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29841 case BUILT_IN_SQRT:
29842 if (out_mode == DFmode && in_mode == DFmode)
29844 if (out_n == 2 && in_n == 2)
29845 return ix86_builtins[IX86_BUILTIN_SQRTPD];
29846 else if (out_n == 4 && in_n == 4)
29847 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
29851 case BUILT_IN_SQRTF:
29852 if (out_mode == SFmode && in_mode == SFmode)
29854 if (out_n == 4 && in_n == 4)
29855 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
29856 else if (out_n == 8 && in_n == 8)
29857 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
29861 case BUILT_IN_IFLOOR:
29862 case BUILT_IN_LFLOOR:
29863 case BUILT_IN_LLFLOOR:
29864 /* The round insn does not trap on denormals. */
29865 if (flag_trapping_math || !TARGET_ROUND)
29868 if (out_mode == SImode && in_mode == DFmode)
29870 if (out_n == 4 && in_n == 2)
29871 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX];
29872 else if (out_n == 8 && in_n == 4)
29873 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256];
29877 case BUILT_IN_IFLOORF:
29878 case BUILT_IN_LFLOORF:
29879 case BUILT_IN_LLFLOORF:
29880 /* The round insn does not trap on denormals. */
29881 if (flag_trapping_math || !TARGET_ROUND)
29884 if (out_mode == SImode && in_mode == SFmode)
29886 if (out_n == 4 && in_n == 4)
29887 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX];
29888 else if (out_n == 8 && in_n == 8)
29889 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX256];
29893 case BUILT_IN_ICEIL:
29894 case BUILT_IN_LCEIL:
29895 case BUILT_IN_LLCEIL:
29896 /* The round insn does not trap on denormals. */
29897 if (flag_trapping_math || !TARGET_ROUND)
29900 if (out_mode == SImode && in_mode == DFmode)
29902 if (out_n == 4 && in_n == 2)
29903 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX];
29904 else if (out_n == 8 && in_n == 4)
29905 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256];
29909 case BUILT_IN_ICEILF:
29910 case BUILT_IN_LCEILF:
29911 case BUILT_IN_LLCEILF:
29912 /* The round insn does not trap on denormals. */
29913 if (flag_trapping_math || !TARGET_ROUND)
29916 if (out_mode == SImode && in_mode == SFmode)
29918 if (out_n == 4 && in_n == 4)
29919 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX];
29920 else if (out_n == 8 && in_n == 8)
29921 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX256];
29925 case BUILT_IN_IRINT:
29926 case BUILT_IN_LRINT:
29927 case BUILT_IN_LLRINT:
29928 if (out_mode == SImode && in_mode == DFmode)
29930 if (out_n == 4 && in_n == 2)
29931 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
29932 else if (out_n == 8 && in_n == 4)
29933 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256];
29937 case BUILT_IN_IRINTF:
29938 case BUILT_IN_LRINTF:
29939 case BUILT_IN_LLRINTF:
29940 if (out_mode == SImode && in_mode == SFmode)
29942 if (out_n == 4 && in_n == 4)
29943 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
29944 else if (out_n == 8 && in_n == 8)
29945 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
29949 case BUILT_IN_IROUND:
29950 case BUILT_IN_LROUND:
29951 case BUILT_IN_LLROUND:
29952 /* The round insn does not trap on denormals. */
29953 if (flag_trapping_math || !TARGET_ROUND)
29956 if (out_mode == SImode && in_mode == DFmode)
29958 if (out_n == 4 && in_n == 2)
29959 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX];
29960 else if (out_n == 8 && in_n == 4)
29961 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256];
29965 case BUILT_IN_IROUNDF:
29966 case BUILT_IN_LROUNDF:
29967 case BUILT_IN_LLROUNDF:
29968 /* The round insn does not trap on denormals. */
29969 if (flag_trapping_math || !TARGET_ROUND)
29972 if (out_mode == SImode && in_mode == SFmode)
29974 if (out_n == 4 && in_n == 4)
29975 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX];
29976 else if (out_n == 8 && in_n == 8)
29977 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX256];
29981 case BUILT_IN_COPYSIGN:
29982 if (out_mode == DFmode && in_mode == DFmode)
29984 if (out_n == 2 && in_n == 2)
29985 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
29986 else if (out_n == 4 && in_n == 4)
29987 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
29991 case BUILT_IN_COPYSIGNF:
29992 if (out_mode == SFmode && in_mode == SFmode)
29994 if (out_n == 4 && in_n == 4)
29995 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
29996 else if (out_n == 8 && in_n == 8)
29997 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
30001 case BUILT_IN_FLOOR:
30002 /* The round insn does not trap on denormals. */
30003 if (flag_trapping_math || !TARGET_ROUND)
30006 if (out_mode == DFmode && in_mode == DFmode)
30008 if (out_n == 2 && in_n == 2)
30009 return ix86_builtins[IX86_BUILTIN_FLOORPD];
30010 else if (out_n == 4 && in_n == 4)
30011 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
30015 case BUILT_IN_FLOORF:
30016 /* The round insn does not trap on denormals. */
30017 if (flag_trapping_math || !TARGET_ROUND)
30020 if (out_mode == SFmode && in_mode == SFmode)
30022 if (out_n == 4 && in_n == 4)
30023 return ix86_builtins[IX86_BUILTIN_FLOORPS];
30024 else if (out_n == 8 && in_n == 8)
30025 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
30029 case BUILT_IN_CEIL:
30030 /* The round insn does not trap on denormals. */
30031 if (flag_trapping_math || !TARGET_ROUND)
30034 if (out_mode == DFmode && in_mode == DFmode)
30036 if (out_n == 2 && in_n == 2)
30037 return ix86_builtins[IX86_BUILTIN_CEILPD];
30038 else if (out_n == 4 && in_n == 4)
30039 return ix86_builtins[IX86_BUILTIN_CEILPD256];
30043 case BUILT_IN_CEILF:
30044 /* The round insn does not trap on denormals. */
30045 if (flag_trapping_math || !TARGET_ROUND)
30048 if (out_mode == SFmode && in_mode == SFmode)
30050 if (out_n == 4 && in_n == 4)
30051 return ix86_builtins[IX86_BUILTIN_CEILPS];
30052 else if (out_n == 8 && in_n == 8)
30053 return ix86_builtins[IX86_BUILTIN_CEILPS256];
30057 case BUILT_IN_TRUNC:
30058 /* The round insn does not trap on denormals. */
30059 if (flag_trapping_math || !TARGET_ROUND)
30062 if (out_mode == DFmode && in_mode == DFmode)
30064 if (out_n == 2 && in_n == 2)
30065 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
30066 else if (out_n == 4 && in_n == 4)
30067 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
30071 case BUILT_IN_TRUNCF:
30072 /* The round insn does not trap on denormals. */
30073 if (flag_trapping_math || !TARGET_ROUND)
30076 if (out_mode == SFmode && in_mode == SFmode)
30078 if (out_n == 4 && in_n == 4)
30079 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
30080 else if (out_n == 8 && in_n == 8)
30081 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
30085 case BUILT_IN_RINT:
30086 /* The round insn does not trap on denormals. */
30087 if (flag_trapping_math || !TARGET_ROUND)
30090 if (out_mode == DFmode && in_mode == DFmode)
30092 if (out_n == 2 && in_n == 2)
30093 return ix86_builtins[IX86_BUILTIN_RINTPD];
30094 else if (out_n == 4 && in_n == 4)
30095 return ix86_builtins[IX86_BUILTIN_RINTPD256];
30099 case BUILT_IN_RINTF:
30100 /* The round insn does not trap on denormals. */
30101 if (flag_trapping_math || !TARGET_ROUND)
30104 if (out_mode == SFmode && in_mode == SFmode)
30106 if (out_n == 4 && in_n == 4)
30107 return ix86_builtins[IX86_BUILTIN_RINTPS];
30108 else if (out_n == 8 && in_n == 8)
30109 return ix86_builtins[IX86_BUILTIN_RINTPS256];
30113 case BUILT_IN_ROUND:
30114 /* The round insn does not trap on denormals. */
30115 if (flag_trapping_math || !TARGET_ROUND)
30118 if (out_mode == DFmode && in_mode == DFmode)
30120 if (out_n == 2 && in_n == 2)
30121 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
30122 else if (out_n == 4 && in_n == 4)
30123 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
30127 case BUILT_IN_ROUNDF:
30128 /* The round insn does not trap on denormals. */
30129 if (flag_trapping_math || !TARGET_ROUND)
30132 if (out_mode == SFmode && in_mode == SFmode)
30134 if (out_n == 4 && in_n == 4)
30135 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
30136 else if (out_n == 8 && in_n == 8)
30137 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
30142 if (out_mode == DFmode && in_mode == DFmode)
30144 if (out_n == 2 && in_n == 2)
30145 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
30146 if (out_n == 4 && in_n == 4)
30147 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
30151 case BUILT_IN_FMAF:
30152 if (out_mode == SFmode && in_mode == SFmode)
30154 if (out_n == 4 && in_n == 4)
30155 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
30156 if (out_n == 8 && in_n == 8)
30157 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
30165 /* Dispatch to a handler for a vectorization library. */
30166 if (ix86_veclib_handler)
30167 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
30173 /* Handler for an SVML-style interface to
30174 a library with vectorized intrinsics. */
30177 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
30180 tree fntype, new_fndecl, args;
30183 enum machine_mode el_mode, in_mode;
30186 /* The SVML is suitable for unsafe math only. */
30187 if (!flag_unsafe_math_optimizations)
30190 el_mode = TYPE_MODE (TREE_TYPE (type_out));
30191 n = TYPE_VECTOR_SUBPARTS (type_out);
30192 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30193 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30194 if (el_mode != in_mode
30202 case BUILT_IN_LOG10:
30204 case BUILT_IN_TANH:
30206 case BUILT_IN_ATAN:
30207 case BUILT_IN_ATAN2:
30208 case BUILT_IN_ATANH:
30209 case BUILT_IN_CBRT:
30210 case BUILT_IN_SINH:
30212 case BUILT_IN_ASINH:
30213 case BUILT_IN_ASIN:
30214 case BUILT_IN_COSH:
30216 case BUILT_IN_ACOSH:
30217 case BUILT_IN_ACOS:
30218 if (el_mode != DFmode || n != 2)
30222 case BUILT_IN_EXPF:
30223 case BUILT_IN_LOGF:
30224 case BUILT_IN_LOG10F:
30225 case BUILT_IN_POWF:
30226 case BUILT_IN_TANHF:
30227 case BUILT_IN_TANF:
30228 case BUILT_IN_ATANF:
30229 case BUILT_IN_ATAN2F:
30230 case BUILT_IN_ATANHF:
30231 case BUILT_IN_CBRTF:
30232 case BUILT_IN_SINHF:
30233 case BUILT_IN_SINF:
30234 case BUILT_IN_ASINHF:
30235 case BUILT_IN_ASINF:
30236 case BUILT_IN_COSHF:
30237 case BUILT_IN_COSF:
30238 case BUILT_IN_ACOSHF:
30239 case BUILT_IN_ACOSF:
30240 if (el_mode != SFmode || n != 4)
30248 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
30250 if (fn == BUILT_IN_LOGF)
30251 strcpy (name, "vmlsLn4");
30252 else if (fn == BUILT_IN_LOG)
30253 strcpy (name, "vmldLn2");
30256 sprintf (name, "vmls%s", bname+10);
30257 name[strlen (name)-1] = '4';
30260 sprintf (name, "vmld%s2", bname+10);
30262 /* Convert to uppercase. */
30266 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
30268 args = TREE_CHAIN (args))
30272 fntype = build_function_type_list (type_out, type_in, NULL);
30274 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
30276 /* Build a function declaration for the vectorized function. */
30277 new_fndecl = build_decl (BUILTINS_LOCATION,
30278 FUNCTION_DECL, get_identifier (name), fntype);
30279 TREE_PUBLIC (new_fndecl) = 1;
30280 DECL_EXTERNAL (new_fndecl) = 1;
30281 DECL_IS_NOVOPS (new_fndecl) = 1;
30282 TREE_READONLY (new_fndecl) = 1;
30287 /* Handler for an ACML-style interface to
30288 a library with vectorized intrinsics. */
30291 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
30293 char name[20] = "__vr.._";
30294 tree fntype, new_fndecl, args;
30297 enum machine_mode el_mode, in_mode;
30300 /* The ACML is 64bits only and suitable for unsafe math only as
30301 it does not correctly support parts of IEEE with the required
30302 precision such as denormals. */
30304 || !flag_unsafe_math_optimizations)
30307 el_mode = TYPE_MODE (TREE_TYPE (type_out));
30308 n = TYPE_VECTOR_SUBPARTS (type_out);
30309 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30310 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30311 if (el_mode != in_mode
30321 case BUILT_IN_LOG2:
30322 case BUILT_IN_LOG10:
30325 if (el_mode != DFmode
30330 case BUILT_IN_SINF:
30331 case BUILT_IN_COSF:
30332 case BUILT_IN_EXPF:
30333 case BUILT_IN_POWF:
30334 case BUILT_IN_LOGF:
30335 case BUILT_IN_LOG2F:
30336 case BUILT_IN_LOG10F:
30339 if (el_mode != SFmode
30348 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
30349 sprintf (name + 7, "%s", bname+10);
30352 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
30354 args = TREE_CHAIN (args))
30358 fntype = build_function_type_list (type_out, type_in, NULL);
30360 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
30362 /* Build a function declaration for the vectorized function. */
30363 new_fndecl = build_decl (BUILTINS_LOCATION,
30364 FUNCTION_DECL, get_identifier (name), fntype);
30365 TREE_PUBLIC (new_fndecl) = 1;
30366 DECL_EXTERNAL (new_fndecl) = 1;
30367 DECL_IS_NOVOPS (new_fndecl) = 1;
30368 TREE_READONLY (new_fndecl) = 1;
30373 /* Returns a decl of a function that implements gather load with
30374 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
30375 Return NULL_TREE if it is not available. */
30378 ix86_vectorize_builtin_gather (const_tree mem_vectype,
30379 const_tree index_type, int scale)
30382 enum ix86_builtins code;
30387 if ((TREE_CODE (index_type) != INTEGER_TYPE
30388 && !POINTER_TYPE_P (index_type))
30389 || (TYPE_MODE (index_type) != SImode
30390 && TYPE_MODE (index_type) != DImode))
30393 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
30396 /* v*gather* insn sign extends index to pointer mode. */
30397 if (TYPE_PRECISION (index_type) < POINTER_SIZE
30398 && TYPE_UNSIGNED (index_type))
30403 || (scale & (scale - 1)) != 0)
30406 si = TYPE_MODE (index_type) == SImode;
30407 switch (TYPE_MODE (mem_vectype))
30410 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
30413 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
30416 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
30419 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
30422 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
30425 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
30428 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
30431 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
30437 return ix86_builtins[code];
30440 /* Returns a code for a target-specific builtin that implements
30441 reciprocal of the function, or NULL_TREE if not available. */
30444 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
30445 bool sqrt ATTRIBUTE_UNUSED)
30447 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
30448 && flag_finite_math_only && !flag_trapping_math
30449 && flag_unsafe_math_optimizations))
30453 /* Machine dependent builtins. */
30456 /* Vectorized version of sqrt to rsqrt conversion. */
30457 case IX86_BUILTIN_SQRTPS_NR:
30458 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
30460 case IX86_BUILTIN_SQRTPS_NR256:
30461 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
30467 /* Normal builtins. */
30470 /* Sqrt to rsqrt conversion. */
30471 case BUILT_IN_SQRTF:
30472 return ix86_builtins[IX86_BUILTIN_RSQRTF];
30479 /* Helper for avx_vpermilps256_operand et al. This is also used by
30480 the expansion functions to turn the parallel back into a mask.
30481 The return value is 0 for no match and the imm8+1 for a match. */
30484 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
30486 unsigned i, nelt = GET_MODE_NUNITS (mode);
30488 unsigned char ipar[8];
30490 if (XVECLEN (par, 0) != (int) nelt)
30493 /* Validate that all of the elements are constants, and not totally
30494 out of range. Copy the data into an integral array to make the
30495 subsequent checks easier. */
30496 for (i = 0; i < nelt; ++i)
30498 rtx er = XVECEXP (par, 0, i);
30499 unsigned HOST_WIDE_INT ei;
30501 if (!CONST_INT_P (er))
30512 /* In the 256-bit DFmode case, we can only move elements within
30514 for (i = 0; i < 2; ++i)
30518 mask |= ipar[i] << i;
30520 for (i = 2; i < 4; ++i)
30524 mask |= (ipar[i] - 2) << i;
30529 /* In the 256-bit SFmode case, we have full freedom of movement
30530 within the low 128-bit lane, but the high 128-bit lane must
30531 mirror the exact same pattern. */
30532 for (i = 0; i < 4; ++i)
30533 if (ipar[i] + 4 != ipar[i + 4])
30540 /* In the 128-bit case, we've full freedom in the placement of
30541 the elements from the source operand. */
30542 for (i = 0; i < nelt; ++i)
30543 mask |= ipar[i] << (i * (nelt / 2));
30547 gcc_unreachable ();
30550 /* Make sure success has a non-zero value by adding one. */
30554 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
30555 the expansion functions to turn the parallel back into a mask.
30556 The return value is 0 for no match and the imm8+1 for a match. */
30559 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
30561 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
30563 unsigned char ipar[8];
30565 if (XVECLEN (par, 0) != (int) nelt)
30568 /* Validate that all of the elements are constants, and not totally
30569 out of range. Copy the data into an integral array to make the
30570 subsequent checks easier. */
30571 for (i = 0; i < nelt; ++i)
30573 rtx er = XVECEXP (par, 0, i);
30574 unsigned HOST_WIDE_INT ei;
30576 if (!CONST_INT_P (er))
30579 if (ei >= 2 * nelt)
30584 /* Validate that the halves of the permute are halves. */
30585 for (i = 0; i < nelt2 - 1; ++i)
30586 if (ipar[i] + 1 != ipar[i + 1])
30588 for (i = nelt2; i < nelt - 1; ++i)
30589 if (ipar[i] + 1 != ipar[i + 1])
30592 /* Reconstruct the mask. */
30593 for (i = 0; i < 2; ++i)
30595 unsigned e = ipar[i * nelt2];
30599 mask |= e << (i * 4);
30602 /* Make sure success has a non-zero value by adding one. */
30606 /* Store OPERAND to the memory after reload is completed. This means
30607 that we can't easily use assign_stack_local. */
30609 ix86_force_to_memory (enum machine_mode mode, rtx operand)
30613 gcc_assert (reload_completed);
30614 if (ix86_using_red_zone ())
30616 result = gen_rtx_MEM (mode,
30617 gen_rtx_PLUS (Pmode,
30619 GEN_INT (-RED_ZONE_SIZE)));
30620 emit_move_insn (result, operand);
30622 else if (TARGET_64BIT)
30628 operand = gen_lowpart (DImode, operand);
30632 gen_rtx_SET (VOIDmode,
30633 gen_rtx_MEM (DImode,
30634 gen_rtx_PRE_DEC (DImode,
30635 stack_pointer_rtx)),
30639 gcc_unreachable ();
30641 result = gen_rtx_MEM (mode, stack_pointer_rtx);
30650 split_double_mode (mode, &operand, 1, operands, operands + 1);
30652 gen_rtx_SET (VOIDmode,
30653 gen_rtx_MEM (SImode,
30654 gen_rtx_PRE_DEC (Pmode,
30655 stack_pointer_rtx)),
30658 gen_rtx_SET (VOIDmode,
30659 gen_rtx_MEM (SImode,
30660 gen_rtx_PRE_DEC (Pmode,
30661 stack_pointer_rtx)),
30666 /* Store HImodes as SImodes. */
30667 operand = gen_lowpart (SImode, operand);
30671 gen_rtx_SET (VOIDmode,
30672 gen_rtx_MEM (GET_MODE (operand),
30673 gen_rtx_PRE_DEC (SImode,
30674 stack_pointer_rtx)),
30678 gcc_unreachable ();
30680 result = gen_rtx_MEM (mode, stack_pointer_rtx);
30685 /* Free operand from the memory. */
30687 ix86_free_from_memory (enum machine_mode mode)
30689 if (!ix86_using_red_zone ())
30693 if (mode == DImode || TARGET_64BIT)
30697 /* Use LEA to deallocate stack space. In peephole2 it will be converted
30698 to pop or add instruction if registers are available. */
30699 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
30700 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
30705 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
30707 Put float CONST_DOUBLE in the constant pool instead of fp regs.
30708 QImode must go into class Q_REGS.
30709 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
30710 movdf to do mem-to-mem moves through integer regs. */
30713 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
30715 enum machine_mode mode = GET_MODE (x);
30717 /* We're only allowed to return a subclass of CLASS. Many of the
30718 following checks fail for NO_REGS, so eliminate that early. */
30719 if (regclass == NO_REGS)
30722 /* All classes can load zeros. */
30723 if (x == CONST0_RTX (mode))
30726 /* Force constants into memory if we are loading a (nonzero) constant into
30727 an MMX or SSE register. This is because there are no MMX/SSE instructions
30728 to load from a constant. */
30730 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
30733 /* Prefer SSE regs only, if we can use them for math. */
30734 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
30735 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
30737 /* Floating-point constants need more complex checks. */
30738 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
30740 /* General regs can load everything. */
30741 if (reg_class_subset_p (regclass, GENERAL_REGS))
30744 /* Floats can load 0 and 1 plus some others. Note that we eliminated
30745 zero above. We only want to wind up preferring 80387 registers if
30746 we plan on doing computation with them. */
30748 && standard_80387_constant_p (x) > 0)
30750 /* Limit class to non-sse. */
30751 if (regclass == FLOAT_SSE_REGS)
30753 if (regclass == FP_TOP_SSE_REGS)
30755 if (regclass == FP_SECOND_SSE_REGS)
30756 return FP_SECOND_REG;
30757 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
30764 /* Generally when we see PLUS here, it's the function invariant
30765 (plus soft-fp const_int). Which can only be computed into general
30767 if (GET_CODE (x) == PLUS)
30768 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
30770 /* QImode constants are easy to load, but non-constant QImode data
30771 must go into Q_REGS. */
30772 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
30774 if (reg_class_subset_p (regclass, Q_REGS))
30776 if (reg_class_subset_p (Q_REGS, regclass))
30784 /* Discourage putting floating-point values in SSE registers unless
30785 SSE math is being used, and likewise for the 387 registers. */
30787 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
30789 enum machine_mode mode = GET_MODE (x);
30791 /* Restrict the output reload class to the register bank that we are doing
30792 math on. If we would like not to return a subset of CLASS, reject this
30793 alternative: if reload cannot do this, it will still use its choice. */
30794 mode = GET_MODE (x);
30795 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
30796 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
30798 if (X87_FLOAT_MODE_P (mode))
30800 if (regclass == FP_TOP_SSE_REGS)
30802 else if (regclass == FP_SECOND_SSE_REGS)
30803 return FP_SECOND_REG;
30805 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
30812 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
30813 enum machine_mode mode, secondary_reload_info *sri)
30815 /* Double-word spills from general registers to non-offsettable memory
30816 references (zero-extended addresses) require special handling. */
30819 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
30820 && rclass == GENERAL_REGS
30821 && !offsettable_memref_p (x))
30824 ? CODE_FOR_reload_noff_load
30825 : CODE_FOR_reload_noff_store);
30826 /* Add the cost of moving address to a temporary. */
30827 sri->extra_cost = 1;
30832 /* QImode spills from non-QI registers require
30833 intermediate register on 32bit targets. */
30835 && !in_p && mode == QImode
30836 && (rclass == GENERAL_REGS
30837 || rclass == LEGACY_REGS
30838 || rclass == INDEX_REGS))
30847 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
30848 regno = true_regnum (x);
30850 /* Return Q_REGS if the operand is in memory. */
30855 /* This condition handles corner case where an expression involving
30856 pointers gets vectorized. We're trying to use the address of a
30857 stack slot as a vector initializer.
30859 (set (reg:V2DI 74 [ vect_cst_.2 ])
30860 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
30862 Eventually frame gets turned into sp+offset like this:
30864 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30865 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30866 (const_int 392 [0x188]))))
30868 That later gets turned into:
30870 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30871 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30872 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
30874 We'll have the following reload recorded:
30876 Reload 0: reload_in (DI) =
30877 (plus:DI (reg/f:DI 7 sp)
30878 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
30879 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30880 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
30881 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
30882 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30883 reload_reg_rtx: (reg:V2DI 22 xmm1)
30885 Which isn't going to work since SSE instructions can't handle scalar
30886 additions. Returning GENERAL_REGS forces the addition into integer
30887 register and reload can handle subsequent reloads without problems. */
30889 if (in_p && GET_CODE (x) == PLUS
30890 && SSE_CLASS_P (rclass)
30891 && SCALAR_INT_MODE_P (mode))
30892 return GENERAL_REGS;
30897 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
30900 ix86_class_likely_spilled_p (reg_class_t rclass)
30911 case SSE_FIRST_REG:
30913 case FP_SECOND_REG:
30923 /* If we are copying between general and FP registers, we need a memory
30924 location. The same is true for SSE and MMX registers.
30926 To optimize register_move_cost performance, allow inline variant.
30928 The macro can't work reliably when one of the CLASSES is class containing
30929 registers from multiple units (SSE, MMX, integer). We avoid this by never
30930 combining those units in single alternative in the machine description.
30931 Ensure that this constraint holds to avoid unexpected surprises.
30933 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
30934 enforce these sanity checks. */
30937 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
30938 enum machine_mode mode, int strict)
30940 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
30941 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
30942 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
30943 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
30944 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
30945 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
30947 gcc_assert (!strict);
30951 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
30954 /* ??? This is a lie. We do have moves between mmx/general, and for
30955 mmx/sse2. But by saying we need secondary memory we discourage the
30956 register allocator from using the mmx registers unless needed. */
30957 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
30960 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
30962 /* SSE1 doesn't have any direct moves from other classes. */
30966 /* If the target says that inter-unit moves are more expensive
30967 than moving through memory, then don't generate them. */
30968 if (!TARGET_INTER_UNIT_MOVES)
30971 /* Between SSE and general, we have moves no larger than word size. */
30972 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
30980 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
30981 enum machine_mode mode, int strict)
30983 return inline_secondary_memory_needed (class1, class2, mode, strict);
30986 /* Implement the TARGET_CLASS_MAX_NREGS hook.
30988 On the 80386, this is the size of MODE in words,
30989 except in the FP regs, where a single reg is always enough. */
30991 static unsigned char
30992 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
30994 if (MAYBE_INTEGER_CLASS_P (rclass))
30996 if (mode == XFmode)
30997 return (TARGET_64BIT ? 2 : 3);
30998 else if (mode == XCmode)
30999 return (TARGET_64BIT ? 4 : 6);
31001 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
31005 if (COMPLEX_MODE_P (mode))
31012 /* Return true if the registers in CLASS cannot represent the change from
31013 modes FROM to TO. */
31016 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
31017 enum reg_class regclass)
31022 /* x87 registers can't do subreg at all, as all values are reformatted
31023 to extended precision. */
31024 if (MAYBE_FLOAT_CLASS_P (regclass))
31027 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
31029 /* Vector registers do not support QI or HImode loads. If we don't
31030 disallow a change to these modes, reload will assume it's ok to
31031 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
31032 the vec_dupv4hi pattern. */
31033 if (GET_MODE_SIZE (from) < 4)
31036 /* Vector registers do not support subreg with nonzero offsets, which
31037 are otherwise valid for integer registers. Since we can't see
31038 whether we have a nonzero offset from here, prohibit all
31039 nonparadoxical subregs changing size. */
31040 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
31047 /* Return the cost of moving data of mode M between a
31048 register and memory. A value of 2 is the default; this cost is
31049 relative to those in `REGISTER_MOVE_COST'.
31051 This function is used extensively by register_move_cost that is used to
31052 build tables at startup. Make it inline in this case.
31053 When IN is 2, return maximum of in and out move cost.
31055 If moving between registers and memory is more expensive than
31056 between two registers, you should define this macro to express the
31059 Model also increased moving costs of QImode registers in non
31063 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
31067 if (FLOAT_CLASS_P (regclass))
31085 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
31086 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
31088 if (SSE_CLASS_P (regclass))
31091 switch (GET_MODE_SIZE (mode))
31106 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
31107 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
31109 if (MMX_CLASS_P (regclass))
31112 switch (GET_MODE_SIZE (mode))
31124 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
31125 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
31127 switch (GET_MODE_SIZE (mode))
31130 if (Q_CLASS_P (regclass) || TARGET_64BIT)
31133 return ix86_cost->int_store[0];
31134 if (TARGET_PARTIAL_REG_DEPENDENCY
31135 && optimize_function_for_speed_p (cfun))
31136 cost = ix86_cost->movzbl_load;
31138 cost = ix86_cost->int_load[0];
31140 return MAX (cost, ix86_cost->int_store[0]);
31146 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
31148 return ix86_cost->movzbl_load;
31150 return ix86_cost->int_store[0] + 4;
31155 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
31156 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
31158 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
31159 if (mode == TFmode)
31162 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
31164 cost = ix86_cost->int_load[2];
31166 cost = ix86_cost->int_store[2];
31167 return (cost * (((int) GET_MODE_SIZE (mode)
31168 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
31173 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
31176 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
31180 /* Return the cost of moving data from a register in class CLASS1 to
31181 one in class CLASS2.
31183 It is not required that the cost always equal 2 when FROM is the same as TO;
31184 on some machines it is expensive to move between registers if they are not
31185 general registers. */
31188 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
31189 reg_class_t class2_i)
31191 enum reg_class class1 = (enum reg_class) class1_i;
31192 enum reg_class class2 = (enum reg_class) class2_i;
31194 /* In case we require secondary memory, compute cost of the store followed
31195 by load. In order to avoid bad register allocation choices, we need
31196 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31198 if (inline_secondary_memory_needed (class1, class2, mode, 0))
31202 cost += inline_memory_move_cost (mode, class1, 2);
31203 cost += inline_memory_move_cost (mode, class2, 2);
31205 /* In case of copying from general_purpose_register we may emit multiple
31206 stores followed by single load causing memory size mismatch stall.
31207 Count this as arbitrarily high cost of 20. */
31208 if (targetm.class_max_nregs (class1, mode)
31209 > targetm.class_max_nregs (class2, mode))
31212 /* In the case of FP/MMX moves, the registers actually overlap, and we
31213 have to switch modes in order to treat them differently. */
31214 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
31215 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
31221 /* Moves between SSE/MMX and integer unit are expensive. */
31222 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
31223 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
31225 /* ??? By keeping returned value relatively high, we limit the number
31226 of moves between integer and MMX/SSE registers for all targets.
31227 Additionally, high value prevents problem with x86_modes_tieable_p(),
31228 where integer modes in MMX/SSE registers are not tieable
31229 because of missing QImode and HImode moves to, from or between
31230 MMX/SSE registers. */
31231 return MAX (8, ix86_cost->mmxsse_to_integer);
31233 if (MAYBE_FLOAT_CLASS_P (class1))
31234 return ix86_cost->fp_move;
31235 if (MAYBE_SSE_CLASS_P (class1))
31236 return ix86_cost->sse_move;
31237 if (MAYBE_MMX_CLASS_P (class1))
31238 return ix86_cost->mmx_move;
31242 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31246 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
31248 /* Flags and only flags can only hold CCmode values. */
31249 if (CC_REGNO_P (regno))
31250 return GET_MODE_CLASS (mode) == MODE_CC;
31251 if (GET_MODE_CLASS (mode) == MODE_CC
31252 || GET_MODE_CLASS (mode) == MODE_RANDOM
31253 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
31255 if (FP_REGNO_P (regno))
31256 return VALID_FP_MODE_P (mode);
31257 if (SSE_REGNO_P (regno))
31259 /* We implement the move patterns for all vector modes into and
31260 out of SSE registers, even when no operation instructions
31261 are available. OImode move is available only when AVX is
31263 return ((TARGET_AVX && mode == OImode)
31264 || VALID_AVX256_REG_MODE (mode)
31265 || VALID_SSE_REG_MODE (mode)
31266 || VALID_SSE2_REG_MODE (mode)
31267 || VALID_MMX_REG_MODE (mode)
31268 || VALID_MMX_REG_MODE_3DNOW (mode));
31270 if (MMX_REGNO_P (regno))
31272 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31273 so if the register is available at all, then we can move data of
31274 the given mode into or out of it. */
31275 return (VALID_MMX_REG_MODE (mode)
31276 || VALID_MMX_REG_MODE_3DNOW (mode));
31279 if (mode == QImode)
31281 /* Take care for QImode values - they can be in non-QI regs,
31282 but then they do cause partial register stalls. */
31283 if (regno <= BX_REG || TARGET_64BIT)
31285 if (!TARGET_PARTIAL_REG_STALL)
31287 return !can_create_pseudo_p ();
31289 /* We handle both integer and floats in the general purpose registers. */
31290 else if (VALID_INT_MODE_P (mode))
31292 else if (VALID_FP_MODE_P (mode))
31294 else if (VALID_DFP_MODE_P (mode))
31296 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
31297 on to use that value in smaller contexts, this can easily force a
31298 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
31299 supporting DImode, allow it. */
31300 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
31306 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
31307 tieable integer mode. */
31310 ix86_tieable_integer_mode_p (enum machine_mode mode)
31319 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
31322 return TARGET_64BIT;
31329 /* Return true if MODE1 is accessible in a register that can hold MODE2
31330 without copying. That is, all register classes that can hold MODE2
31331 can also hold MODE1. */
31334 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
31336 if (mode1 == mode2)
31339 if (ix86_tieable_integer_mode_p (mode1)
31340 && ix86_tieable_integer_mode_p (mode2))
31343 /* MODE2 being XFmode implies fp stack or general regs, which means we
31344 can tie any smaller floating point modes to it. Note that we do not
31345 tie this with TFmode. */
31346 if (mode2 == XFmode)
31347 return mode1 == SFmode || mode1 == DFmode;
31349 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
31350 that we can tie it with SFmode. */
31351 if (mode2 == DFmode)
31352 return mode1 == SFmode;
31354 /* If MODE2 is only appropriate for an SSE register, then tie with
31355 any other mode acceptable to SSE registers. */
31356 if (GET_MODE_SIZE (mode2) == 16
31357 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
31358 return (GET_MODE_SIZE (mode1) == 16
31359 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
31361 /* If MODE2 is appropriate for an MMX register, then tie
31362 with any other mode acceptable to MMX registers. */
31363 if (GET_MODE_SIZE (mode2) == 8
31364 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
31365 return (GET_MODE_SIZE (mode1) == 8
31366 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
31371 /* Compute a (partial) cost for rtx X. Return true if the complete
31372 cost has been computed, and false if subexpressions should be
31373 scanned. In either case, *TOTAL contains the cost result. */
31376 ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
31379 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
31380 enum machine_mode mode = GET_MODE (x);
31381 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
31389 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
31391 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
31393 else if (flag_pic && SYMBOLIC_CONST (x)
31395 || (!GET_CODE (x) != LABEL_REF
31396 && (GET_CODE (x) != SYMBOL_REF
31397 || !SYMBOL_REF_LOCAL_P (x)))))
31404 if (mode == VOIDmode)
31407 switch (standard_80387_constant_p (x))
31412 default: /* Other constants */
31417 /* Start with (MEM (SYMBOL_REF)), since that's where
31418 it'll probably end up. Add a penalty for size. */
31419 *total = (COSTS_N_INSNS (1)
31420 + (flag_pic != 0 && !TARGET_64BIT)
31421 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
31427 /* The zero extensions is often completely free on x86_64, so make
31428 it as cheap as possible. */
31429 if (TARGET_64BIT && mode == DImode
31430 && GET_MODE (XEXP (x, 0)) == SImode)
31432 else if (TARGET_ZERO_EXTEND_WITH_AND)
31433 *total = cost->add;
31435 *total = cost->movzx;
31439 *total = cost->movsx;
31443 if (CONST_INT_P (XEXP (x, 1))
31444 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
31446 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
31449 *total = cost->add;
31452 if ((value == 2 || value == 3)
31453 && cost->lea <= cost->shift_const)
31455 *total = cost->lea;
31465 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
31467 if (CONST_INT_P (XEXP (x, 1)))
31469 if (INTVAL (XEXP (x, 1)) > 32)
31470 *total = cost->shift_const + COSTS_N_INSNS (2);
31472 *total = cost->shift_const * 2;
31476 if (GET_CODE (XEXP (x, 1)) == AND)
31477 *total = cost->shift_var * 2;
31479 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
31484 if (CONST_INT_P (XEXP (x, 1)))
31485 *total = cost->shift_const;
31487 *total = cost->shift_var;
31495 gcc_assert (FLOAT_MODE_P (mode));
31496 gcc_assert (TARGET_FMA || TARGET_FMA4);
31498 /* ??? SSE scalar/vector cost should be used here. */
31499 /* ??? Bald assumption that fma has the same cost as fmul. */
31500 *total = cost->fmul;
31501 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
31503 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
31505 if (GET_CODE (sub) == NEG)
31506 sub = XEXP (sub, 0);
31507 *total += rtx_cost (sub, FMA, 0, speed);
31510 if (GET_CODE (sub) == NEG)
31511 sub = XEXP (sub, 0);
31512 *total += rtx_cost (sub, FMA, 2, speed);
31517 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31519 /* ??? SSE scalar cost should be used here. */
31520 *total = cost->fmul;
31523 else if (X87_FLOAT_MODE_P (mode))
31525 *total = cost->fmul;
31528 else if (FLOAT_MODE_P (mode))
31530 /* ??? SSE vector cost should be used here. */
31531 *total = cost->fmul;
31536 rtx op0 = XEXP (x, 0);
31537 rtx op1 = XEXP (x, 1);
31539 if (CONST_INT_P (XEXP (x, 1)))
31541 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
31542 for (nbits = 0; value != 0; value &= value - 1)
31546 /* This is arbitrary. */
31549 /* Compute costs correctly for widening multiplication. */
31550 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
31551 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
31552 == GET_MODE_SIZE (mode))
31554 int is_mulwiden = 0;
31555 enum machine_mode inner_mode = GET_MODE (op0);
31557 if (GET_CODE (op0) == GET_CODE (op1))
31558 is_mulwiden = 1, op1 = XEXP (op1, 0);
31559 else if (CONST_INT_P (op1))
31561 if (GET_CODE (op0) == SIGN_EXTEND)
31562 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
31565 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
31569 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
31572 *total = (cost->mult_init[MODE_INDEX (mode)]
31573 + nbits * cost->mult_bit
31574 + rtx_cost (op0, outer_code, opno, speed)
31575 + rtx_cost (op1, outer_code, opno, speed));
31584 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31585 /* ??? SSE cost should be used here. */
31586 *total = cost->fdiv;
31587 else if (X87_FLOAT_MODE_P (mode))
31588 *total = cost->fdiv;
31589 else if (FLOAT_MODE_P (mode))
31590 /* ??? SSE vector cost should be used here. */
31591 *total = cost->fdiv;
31593 *total = cost->divide[MODE_INDEX (mode)];
31597 if (GET_MODE_CLASS (mode) == MODE_INT
31598 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
31600 if (GET_CODE (XEXP (x, 0)) == PLUS
31601 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
31602 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
31603 && CONSTANT_P (XEXP (x, 1)))
31605 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
31606 if (val == 2 || val == 4 || val == 8)
31608 *total = cost->lea;
31609 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
31610 outer_code, opno, speed);
31611 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
31612 outer_code, opno, speed);
31613 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31617 else if (GET_CODE (XEXP (x, 0)) == MULT
31618 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
31620 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
31621 if (val == 2 || val == 4 || val == 8)
31623 *total = cost->lea;
31624 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
31625 outer_code, opno, speed);
31626 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31630 else if (GET_CODE (XEXP (x, 0)) == PLUS)
31632 *total = cost->lea;
31633 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
31634 outer_code, opno, speed);
31635 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
31636 outer_code, opno, speed);
31637 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31644 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31646 /* ??? SSE cost should be used here. */
31647 *total = cost->fadd;
31650 else if (X87_FLOAT_MODE_P (mode))
31652 *total = cost->fadd;
31655 else if (FLOAT_MODE_P (mode))
31657 /* ??? SSE vector cost should be used here. */
31658 *total = cost->fadd;
31666 if (!TARGET_64BIT && mode == DImode)
31668 *total = (cost->add * 2
31669 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
31670 << (GET_MODE (XEXP (x, 0)) != DImode))
31671 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
31672 << (GET_MODE (XEXP (x, 1)) != DImode)));
31678 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31680 /* ??? SSE cost should be used here. */
31681 *total = cost->fchs;
31684 else if (X87_FLOAT_MODE_P (mode))
31686 *total = cost->fchs;
31689 else if (FLOAT_MODE_P (mode))
31691 /* ??? SSE vector cost should be used here. */
31692 *total = cost->fchs;
31698 if (!TARGET_64BIT && mode == DImode)
31699 *total = cost->add * 2;
31701 *total = cost->add;
31705 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
31706 && XEXP (XEXP (x, 0), 1) == const1_rtx
31707 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
31708 && XEXP (x, 1) == const0_rtx)
31710 /* This kind of construct is implemented using test[bwl].
31711 Treat it as if we had an AND. */
31712 *total = (cost->add
31713 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
31714 + rtx_cost (const1_rtx, outer_code, opno, speed));
31720 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
31725 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31726 /* ??? SSE cost should be used here. */
31727 *total = cost->fabs;
31728 else if (X87_FLOAT_MODE_P (mode))
31729 *total = cost->fabs;
31730 else if (FLOAT_MODE_P (mode))
31731 /* ??? SSE vector cost should be used here. */
31732 *total = cost->fabs;
31736 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31737 /* ??? SSE cost should be used here. */
31738 *total = cost->fsqrt;
31739 else if (X87_FLOAT_MODE_P (mode))
31740 *total = cost->fsqrt;
31741 else if (FLOAT_MODE_P (mode))
31742 /* ??? SSE vector cost should be used here. */
31743 *total = cost->fsqrt;
31747 if (XINT (x, 1) == UNSPEC_TP)
31754 case VEC_DUPLICATE:
31755 /* ??? Assume all of these vector manipulation patterns are
31756 recognizable. In which case they all pretty much have the
31758 *total = COSTS_N_INSNS (1);
31768 static int current_machopic_label_num;
31770 /* Given a symbol name and its associated stub, write out the
31771 definition of the stub. */
31774 machopic_output_stub (FILE *file, const char *symb, const char *stub)
31776 unsigned int length;
31777 char *binder_name, *symbol_name, lazy_ptr_name[32];
31778 int label = ++current_machopic_label_num;
31780 /* For 64-bit we shouldn't get here. */
31781 gcc_assert (!TARGET_64BIT);
31783 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
31784 symb = targetm.strip_name_encoding (symb);
31786 length = strlen (stub);
31787 binder_name = XALLOCAVEC (char, length + 32);
31788 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
31790 length = strlen (symb);
31791 symbol_name = XALLOCAVEC (char, length + 32);
31792 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
31794 sprintf (lazy_ptr_name, "L%d$lz", label);
31796 if (MACHOPIC_ATT_STUB)
31797 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
31798 else if (MACHOPIC_PURE)
31799 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
31801 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
31803 fprintf (file, "%s:\n", stub);
31804 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31806 if (MACHOPIC_ATT_STUB)
31808 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
31810 else if (MACHOPIC_PURE)
31813 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31814 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
31815 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
31816 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
31817 label, lazy_ptr_name, label);
31818 fprintf (file, "\tjmp\t*%%ecx\n");
31821 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
31823 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
31824 it needs no stub-binding-helper. */
31825 if (MACHOPIC_ATT_STUB)
31828 fprintf (file, "%s:\n", binder_name);
31832 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
31833 fprintf (file, "\tpushl\t%%ecx\n");
31836 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
31838 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
31840 /* N.B. Keep the correspondence of these
31841 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
31842 old-pic/new-pic/non-pic stubs; altering this will break
31843 compatibility with existing dylibs. */
31846 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31847 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
31850 /* 16-byte -mdynamic-no-pic stub. */
31851 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
31853 fprintf (file, "%s:\n", lazy_ptr_name);
31854 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31855 fprintf (file, ASM_LONG "%s\n", binder_name);
31857 #endif /* TARGET_MACHO */
31859 /* Order the registers for register allocator. */
31862 x86_order_regs_for_local_alloc (void)
31867 /* First allocate the local general purpose registers. */
31868 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
31869 if (GENERAL_REGNO_P (i) && call_used_regs[i])
31870 reg_alloc_order [pos++] = i;
31872 /* Global general purpose registers. */
31873 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
31874 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
31875 reg_alloc_order [pos++] = i;
31877 /* x87 registers come first in case we are doing FP math
31879 if (!TARGET_SSE_MATH)
31880 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
31881 reg_alloc_order [pos++] = i;
31883 /* SSE registers. */
31884 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
31885 reg_alloc_order [pos++] = i;
31886 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
31887 reg_alloc_order [pos++] = i;
31889 /* x87 registers. */
31890 if (TARGET_SSE_MATH)
31891 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
31892 reg_alloc_order [pos++] = i;
31894 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
31895 reg_alloc_order [pos++] = i;
31897 /* Initialize the rest of array as we do not allocate some registers
31899 while (pos < FIRST_PSEUDO_REGISTER)
31900 reg_alloc_order [pos++] = 0;
31903 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
31904 in struct attribute_spec handler. */
31906 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
31908 int flags ATTRIBUTE_UNUSED,
31909 bool *no_add_attrs)
31911 if (TREE_CODE (*node) != FUNCTION_TYPE
31912 && TREE_CODE (*node) != METHOD_TYPE
31913 && TREE_CODE (*node) != FIELD_DECL
31914 && TREE_CODE (*node) != TYPE_DECL)
31916 warning (OPT_Wattributes, "%qE attribute only applies to functions",
31918 *no_add_attrs = true;
31923 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
31925 *no_add_attrs = true;
31928 if (is_attribute_p ("callee_pop_aggregate_return", name))
31932 cst = TREE_VALUE (args);
31933 if (TREE_CODE (cst) != INTEGER_CST)
31935 warning (OPT_Wattributes,
31936 "%qE attribute requires an integer constant argument",
31938 *no_add_attrs = true;
31940 else if (compare_tree_int (cst, 0) != 0
31941 && compare_tree_int (cst, 1) != 0)
31943 warning (OPT_Wattributes,
31944 "argument to %qE attribute is neither zero, nor one",
31946 *no_add_attrs = true;
31955 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
31956 struct attribute_spec.handler. */
31958 ix86_handle_abi_attribute (tree *node, tree name,
31959 tree args ATTRIBUTE_UNUSED,
31960 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
31962 if (TREE_CODE (*node) != FUNCTION_TYPE
31963 && TREE_CODE (*node) != METHOD_TYPE
31964 && TREE_CODE (*node) != FIELD_DECL
31965 && TREE_CODE (*node) != TYPE_DECL)
31967 warning (OPT_Wattributes, "%qE attribute only applies to functions",
31969 *no_add_attrs = true;
31973 /* Can combine regparm with all attributes but fastcall. */
31974 if (is_attribute_p ("ms_abi", name))
31976 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
31978 error ("ms_abi and sysv_abi attributes are not compatible");
31983 else if (is_attribute_p ("sysv_abi", name))
31985 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
31987 error ("ms_abi and sysv_abi attributes are not compatible");
31996 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
31997 struct attribute_spec.handler. */
31999 ix86_handle_struct_attribute (tree *node, tree name,
32000 tree args ATTRIBUTE_UNUSED,
32001 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32004 if (DECL_P (*node))
32006 if (TREE_CODE (*node) == TYPE_DECL)
32007 type = &TREE_TYPE (*node);
32012 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
32014 warning (OPT_Wattributes, "%qE attribute ignored",
32016 *no_add_attrs = true;
32019 else if ((is_attribute_p ("ms_struct", name)
32020 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
32021 || ((is_attribute_p ("gcc_struct", name)
32022 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
32024 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
32026 *no_add_attrs = true;
32033 ix86_handle_fndecl_attribute (tree *node, tree name,
32034 tree args ATTRIBUTE_UNUSED,
32035 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32037 if (TREE_CODE (*node) != FUNCTION_DECL)
32039 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32041 *no_add_attrs = true;
32047 ix86_ms_bitfield_layout_p (const_tree record_type)
32049 return ((TARGET_MS_BITFIELD_LAYOUT
32050 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
32051 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
32054 /* Returns an expression indicating where the this parameter is
32055 located on entry to the FUNCTION. */
32058 x86_this_parameter (tree function)
32060 tree type = TREE_TYPE (function);
32061 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
32066 const int *parm_regs;
32068 if (ix86_function_type_abi (type) == MS_ABI)
32069 parm_regs = x86_64_ms_abi_int_parameter_registers;
32071 parm_regs = x86_64_int_parameter_registers;
32072 return gen_rtx_REG (DImode, parm_regs[aggr]);
32075 nregs = ix86_function_regparm (type, function);
32077 if (nregs > 0 && !stdarg_p (type))
32080 unsigned int ccvt = ix86_get_callcvt (type);
32082 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
32083 regno = aggr ? DX_REG : CX_REG;
32084 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
32088 return gen_rtx_MEM (SImode,
32089 plus_constant (stack_pointer_rtx, 4));
32098 return gen_rtx_MEM (SImode,
32099 plus_constant (stack_pointer_rtx, 4));
32102 return gen_rtx_REG (SImode, regno);
32105 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
32108 /* Determine whether x86_output_mi_thunk can succeed. */
32111 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
32112 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
32113 HOST_WIDE_INT vcall_offset, const_tree function)
32115 /* 64-bit can handle anything. */
32119 /* For 32-bit, everything's fine if we have one free register. */
32120 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
32123 /* Need a free register for vcall_offset. */
32127 /* Need a free register for GOT references. */
32128 if (flag_pic && !targetm.binds_local_p (function))
32131 /* Otherwise ok. */
32135 /* Output the assembler code for a thunk function. THUNK_DECL is the
32136 declaration for the thunk function itself, FUNCTION is the decl for
32137 the target function. DELTA is an immediate constant offset to be
32138 added to THIS. If VCALL_OFFSET is nonzero, the word at
32139 *(*this + vcall_offset) should be added to THIS. */
32142 x86_output_mi_thunk (FILE *file,
32143 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
32144 HOST_WIDE_INT vcall_offset, tree function)
32146 rtx this_param = x86_this_parameter (function);
32147 rtx this_reg, tmp, fnaddr;
32148 unsigned int tmp_regno;
32151 tmp_regno = R10_REG;
32154 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
32155 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
32156 tmp_regno = AX_REG;
32158 tmp_regno = CX_REG;
32161 emit_note (NOTE_INSN_PROLOGUE_END);
32163 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
32164 pull it in now and let DELTA benefit. */
32165 if (REG_P (this_param))
32166 this_reg = this_param;
32167 else if (vcall_offset)
32169 /* Put the this parameter into %eax. */
32170 this_reg = gen_rtx_REG (Pmode, AX_REG);
32171 emit_move_insn (this_reg, this_param);
32174 this_reg = NULL_RTX;
32176 /* Adjust the this parameter by a fixed constant. */
32179 rtx delta_rtx = GEN_INT (delta);
32180 rtx delta_dst = this_reg ? this_reg : this_param;
32184 if (!x86_64_general_operand (delta_rtx, Pmode))
32186 tmp = gen_rtx_REG (Pmode, tmp_regno);
32187 emit_move_insn (tmp, delta_rtx);
32192 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
32195 /* Adjust the this parameter by a value stored in the vtable. */
32198 rtx vcall_addr, vcall_mem, this_mem;
32200 tmp = gen_rtx_REG (Pmode, tmp_regno);
32202 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
32203 if (Pmode != ptr_mode)
32204 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
32205 emit_move_insn (tmp, this_mem);
32207 /* Adjust the this parameter. */
32208 vcall_addr = plus_constant (tmp, vcall_offset);
32210 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
32212 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
32213 emit_move_insn (tmp2, GEN_INT (vcall_offset));
32214 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
32217 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
32218 if (Pmode != ptr_mode)
32219 emit_insn (gen_addsi_1_zext (this_reg,
32220 gen_rtx_REG (ptr_mode,
32224 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
32227 /* If necessary, drop THIS back to its stack slot. */
32228 if (this_reg && this_reg != this_param)
32229 emit_move_insn (this_param, this_reg);
32231 fnaddr = XEXP (DECL_RTL (function), 0);
32234 if (!flag_pic || targetm.binds_local_p (function)
32235 || cfun->machine->call_abi == MS_ABI)
32239 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
32240 tmp = gen_rtx_CONST (Pmode, tmp);
32241 fnaddr = gen_rtx_MEM (Pmode, tmp);
32246 if (!flag_pic || targetm.binds_local_p (function))
32249 else if (TARGET_MACHO)
32251 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
32252 fnaddr = XEXP (fnaddr, 0);
32254 #endif /* TARGET_MACHO */
32257 tmp = gen_rtx_REG (Pmode, CX_REG);
32258 output_set_got (tmp, NULL_RTX);
32260 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
32261 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
32262 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
32266 /* Our sibling call patterns do not allow memories, because we have no
32267 predicate that can distinguish between frame and non-frame memory.
32268 For our purposes here, we can get away with (ab)using a jump pattern,
32269 because we're going to do no optimization. */
32270 if (MEM_P (fnaddr))
32271 emit_jump_insn (gen_indirect_jump (fnaddr));
32274 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
32275 fnaddr = legitimize_pic_address (fnaddr,
32276 gen_rtx_REG (Pmode, tmp_regno));
32278 if (!sibcall_insn_operand (fnaddr, Pmode))
32280 tmp = gen_rtx_REG (Pmode, tmp_regno);
32281 if (GET_MODE (fnaddr) != Pmode)
32282 fnaddr = gen_rtx_ZERO_EXTEND (Pmode, fnaddr);
32283 emit_move_insn (tmp, fnaddr);
32287 tmp = gen_rtx_MEM (QImode, fnaddr);
32288 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
32289 tmp = emit_call_insn (tmp);
32290 SIBLING_CALL_P (tmp) = 1;
32294 /* Emit just enough of rest_of_compilation to get the insns emitted.
32295 Note that use_thunk calls assemble_start_function et al. */
32296 tmp = get_insns ();
32297 insn_locators_alloc ();
32298 shorten_branches (tmp);
32299 final_start_function (tmp, file, 1);
32300 final (tmp, file, 1);
32301 final_end_function ();
32305 x86_file_start (void)
32307 default_file_start ();
32309 darwin_file_start ();
32311 if (X86_FILE_START_VERSION_DIRECTIVE)
32312 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
32313 if (X86_FILE_START_FLTUSED)
32314 fputs ("\t.global\t__fltused\n", asm_out_file);
32315 if (ix86_asm_dialect == ASM_INTEL)
32316 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
32320 x86_field_alignment (tree field, int computed)
32322 enum machine_mode mode;
32323 tree type = TREE_TYPE (field);
32325 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
32327 mode = TYPE_MODE (strip_array_types (type));
32328 if (mode == DFmode || mode == DCmode
32329 || GET_MODE_CLASS (mode) == MODE_INT
32330 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
32331 return MIN (32, computed);
32335 /* Output assembler code to FILE to increment profiler label # LABELNO
32336 for profiling a function entry. */
32338 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
32340 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
32345 #ifndef NO_PROFILE_COUNTERS
32346 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
32349 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
32350 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
32352 fprintf (file, "\tcall\t%s\n", mcount_name);
32356 #ifndef NO_PROFILE_COUNTERS
32357 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
32360 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
32364 #ifndef NO_PROFILE_COUNTERS
32365 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
32368 fprintf (file, "\tcall\t%s\n", mcount_name);
32372 /* We don't have exact information about the insn sizes, but we may assume
32373 quite safely that we are informed about all 1 byte insns and memory
32374 address sizes. This is enough to eliminate unnecessary padding in
32378 min_insn_size (rtx insn)
32382 if (!INSN_P (insn) || !active_insn_p (insn))
32385 /* Discard alignments we've emit and jump instructions. */
32386 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
32387 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
32389 if (JUMP_TABLE_DATA_P (insn))
32392 /* Important case - calls are always 5 bytes.
32393 It is common to have many calls in the row. */
32395 && symbolic_reference_mentioned_p (PATTERN (insn))
32396 && !SIBLING_CALL_P (insn))
32398 len = get_attr_length (insn);
32402 /* For normal instructions we rely on get_attr_length being exact,
32403 with a few exceptions. */
32404 if (!JUMP_P (insn))
32406 enum attr_type type = get_attr_type (insn);
32411 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
32412 || asm_noperands (PATTERN (insn)) >= 0)
32419 /* Otherwise trust get_attr_length. */
32423 l = get_attr_length_address (insn);
32424 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
32433 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32435 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
32439 ix86_avoid_jump_mispredicts (void)
32441 rtx insn, start = get_insns ();
32442 int nbytes = 0, njumps = 0;
32445 /* Look for all minimal intervals of instructions containing 4 jumps.
32446 The intervals are bounded by START and INSN. NBYTES is the total
32447 size of instructions in the interval including INSN and not including
32448 START. When the NBYTES is smaller than 16 bytes, it is possible
32449 that the end of START and INSN ends up in the same 16byte page.
32451 The smallest offset in the page INSN can start is the case where START
32452 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
32453 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
32455 for (insn = start; insn; insn = NEXT_INSN (insn))
32459 if (LABEL_P (insn))
32461 int align = label_to_alignment (insn);
32462 int max_skip = label_to_max_skip (insn);
32466 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
32467 already in the current 16 byte page, because otherwise
32468 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
32469 bytes to reach 16 byte boundary. */
32471 || (align <= 3 && max_skip != (1 << align) - 1))
32474 fprintf (dump_file, "Label %i with max_skip %i\n",
32475 INSN_UID (insn), max_skip);
32478 while (nbytes + max_skip >= 16)
32480 start = NEXT_INSN (start);
32481 if ((JUMP_P (start)
32482 && GET_CODE (PATTERN (start)) != ADDR_VEC
32483 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
32485 njumps--, isjump = 1;
32488 nbytes -= min_insn_size (start);
32494 min_size = min_insn_size (insn);
32495 nbytes += min_size;
32497 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
32498 INSN_UID (insn), min_size);
32500 && GET_CODE (PATTERN (insn)) != ADDR_VEC
32501 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
32509 start = NEXT_INSN (start);
32510 if ((JUMP_P (start)
32511 && GET_CODE (PATTERN (start)) != ADDR_VEC
32512 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
32514 njumps--, isjump = 1;
32517 nbytes -= min_insn_size (start);
32519 gcc_assert (njumps >= 0);
32521 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
32522 INSN_UID (start), INSN_UID (insn), nbytes);
32524 if (njumps == 3 && isjump && nbytes < 16)
32526 int padsize = 15 - nbytes + min_insn_size (insn);
32529 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
32530 INSN_UID (insn), padsize);
32531 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
32537 /* AMD Athlon works faster
32538 when RET is not destination of conditional jump or directly preceded
32539 by other jump instruction. We avoid the penalty by inserting NOP just
32540 before the RET instructions in such cases. */
32542 ix86_pad_returns (void)
32547 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
32549 basic_block bb = e->src;
32550 rtx ret = BB_END (bb);
32552 bool replace = false;
32554 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
32555 || optimize_bb_for_size_p (bb))
32557 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
32558 if (active_insn_p (prev) || LABEL_P (prev))
32560 if (prev && LABEL_P (prev))
32565 FOR_EACH_EDGE (e, ei, bb->preds)
32566 if (EDGE_FREQUENCY (e) && e->src->index >= 0
32567 && !(e->flags & EDGE_FALLTHRU))
32572 prev = prev_active_insn (ret);
32574 && ((JUMP_P (prev) && any_condjump_p (prev))
32577 /* Empty functions get branch mispredict even when
32578 the jump destination is not visible to us. */
32579 if (!prev && !optimize_function_for_size_p (cfun))
32584 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
32590 /* Count the minimum number of instructions in BB. Return 4 if the
32591 number of instructions >= 4. */
32594 ix86_count_insn_bb (basic_block bb)
32597 int insn_count = 0;
32599 /* Count number of instructions in this block. Return 4 if the number
32600 of instructions >= 4. */
32601 FOR_BB_INSNS (bb, insn)
32603 /* Only happen in exit blocks. */
32605 && ANY_RETURN_P (PATTERN (insn)))
32608 if (NONDEBUG_INSN_P (insn)
32609 && GET_CODE (PATTERN (insn)) != USE
32610 && GET_CODE (PATTERN (insn)) != CLOBBER)
32613 if (insn_count >= 4)
32622 /* Count the minimum number of instructions in code path in BB.
32623 Return 4 if the number of instructions >= 4. */
32626 ix86_count_insn (basic_block bb)
32630 int min_prev_count;
32632 /* Only bother counting instructions along paths with no
32633 more than 2 basic blocks between entry and exit. Given
32634 that BB has an edge to exit, determine if a predecessor
32635 of BB has an edge from entry. If so, compute the number
32636 of instructions in the predecessor block. If there
32637 happen to be multiple such blocks, compute the minimum. */
32638 min_prev_count = 4;
32639 FOR_EACH_EDGE (e, ei, bb->preds)
32642 edge_iterator prev_ei;
32644 if (e->src == ENTRY_BLOCK_PTR)
32646 min_prev_count = 0;
32649 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
32651 if (prev_e->src == ENTRY_BLOCK_PTR)
32653 int count = ix86_count_insn_bb (e->src);
32654 if (count < min_prev_count)
32655 min_prev_count = count;
32661 if (min_prev_count < 4)
32662 min_prev_count += ix86_count_insn_bb (bb);
32664 return min_prev_count;
32667 /* Pad short funtion to 4 instructions. */
32670 ix86_pad_short_function (void)
32675 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
32677 rtx ret = BB_END (e->src);
32678 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
32680 int insn_count = ix86_count_insn (e->src);
32682 /* Pad short function. */
32683 if (insn_count < 4)
32687 /* Find epilogue. */
32690 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
32691 insn = PREV_INSN (insn);
32696 /* Two NOPs count as one instruction. */
32697 insn_count = 2 * (4 - insn_count);
32698 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
32704 /* Implement machine specific optimizations. We implement padding of returns
32705 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
32709 /* We are freeing block_for_insn in the toplev to keep compatibility
32710 with old MDEP_REORGS that are not CFG based. Recompute it now. */
32711 compute_bb_for_insn ();
32713 /* Run the vzeroupper optimization if needed. */
32714 if (TARGET_VZEROUPPER)
32715 move_or_delete_vzeroupper ();
32717 if (optimize && optimize_function_for_speed_p (cfun))
32719 if (TARGET_PAD_SHORT_FUNCTION)
32720 ix86_pad_short_function ();
32721 else if (TARGET_PAD_RETURNS)
32722 ix86_pad_returns ();
32723 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32724 if (TARGET_FOUR_JUMP_LIMIT)
32725 ix86_avoid_jump_mispredicts ();
32730 /* Return nonzero when QImode register that must be represented via REX prefix
32733 x86_extended_QIreg_mentioned_p (rtx insn)
32736 extract_insn_cached (insn);
32737 for (i = 0; i < recog_data.n_operands; i++)
32738 if (REG_P (recog_data.operand[i])
32739 && REGNO (recog_data.operand[i]) > BX_REG)
32744 /* Return nonzero when P points to register encoded via REX prefix.
32745 Called via for_each_rtx. */
32747 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
32749 unsigned int regno;
32752 regno = REGNO (*p);
32753 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
32756 /* Return true when INSN mentions register that must be encoded using REX
32759 x86_extended_reg_mentioned_p (rtx insn)
32761 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
32762 extended_reg_mentioned_1, NULL);
32765 /* If profitable, negate (without causing overflow) integer constant
32766 of mode MODE at location LOC. Return true in this case. */
32768 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
32772 if (!CONST_INT_P (*loc))
32778 /* DImode x86_64 constants must fit in 32 bits. */
32779 gcc_assert (x86_64_immediate_operand (*loc, mode));
32790 gcc_unreachable ();
32793 /* Avoid overflows. */
32794 if (mode_signbit_p (mode, *loc))
32797 val = INTVAL (*loc);
32799 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
32800 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
32801 if ((val < 0 && val != -128)
32804 *loc = GEN_INT (-val);
32811 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
32812 optabs would emit if we didn't have TFmode patterns. */
32815 x86_emit_floatuns (rtx operands[2])
32817 rtx neglab, donelab, i0, i1, f0, in, out;
32818 enum machine_mode mode, inmode;
32820 inmode = GET_MODE (operands[1]);
32821 gcc_assert (inmode == SImode || inmode == DImode);
32824 in = force_reg (inmode, operands[1]);
32825 mode = GET_MODE (out);
32826 neglab = gen_label_rtx ();
32827 donelab = gen_label_rtx ();
32828 f0 = gen_reg_rtx (mode);
32830 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
32832 expand_float (out, in, 0);
32834 emit_jump_insn (gen_jump (donelab));
32837 emit_label (neglab);
32839 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
32841 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
32843 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
32845 expand_float (f0, i0, 0);
32847 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
32849 emit_label (donelab);
32852 /* AVX2 does support 32-byte integer vector operations,
32853 thus the longest vector we are faced with is V32QImode. */
32854 #define MAX_VECT_LEN 32
32856 struct expand_vec_perm_d
32858 rtx target, op0, op1;
32859 unsigned char perm[MAX_VECT_LEN];
32860 enum machine_mode vmode;
32861 unsigned char nelt;
32865 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
32866 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
32868 /* Get a vector mode of the same size as the original but with elements
32869 twice as wide. This is only guaranteed to apply to integral vectors. */
32871 static inline enum machine_mode
32872 get_mode_wider_vector (enum machine_mode o)
32874 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
32875 enum machine_mode n = GET_MODE_WIDER_MODE (o);
32876 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
32877 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
32881 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32882 with all elements equal to VAR. Return true if successful. */
32885 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
32886 rtx target, rtx val)
32909 /* First attempt to recognize VAL as-is. */
32910 dup = gen_rtx_VEC_DUPLICATE (mode, val);
32911 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
32912 if (recog_memoized (insn) < 0)
32915 /* If that fails, force VAL into a register. */
32918 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
32919 seq = get_insns ();
32922 emit_insn_before (seq, insn);
32924 ok = recog_memoized (insn) >= 0;
32933 if (TARGET_SSE || TARGET_3DNOW_A)
32937 val = gen_lowpart (SImode, val);
32938 x = gen_rtx_TRUNCATE (HImode, val);
32939 x = gen_rtx_VEC_DUPLICATE (mode, x);
32940 emit_insn (gen_rtx_SET (VOIDmode, target, x));
32953 struct expand_vec_perm_d dperm;
32957 memset (&dperm, 0, sizeof (dperm));
32958 dperm.target = target;
32959 dperm.vmode = mode;
32960 dperm.nelt = GET_MODE_NUNITS (mode);
32961 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
32963 /* Extend to SImode using a paradoxical SUBREG. */
32964 tmp1 = gen_reg_rtx (SImode);
32965 emit_move_insn (tmp1, gen_lowpart (SImode, val));
32967 /* Insert the SImode value as low element of a V4SImode vector. */
32968 tmp2 = gen_lowpart (V4SImode, dperm.op0);
32969 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
32971 ok = (expand_vec_perm_1 (&dperm)
32972 || expand_vec_perm_broadcast_1 (&dperm));
32984 /* Replicate the value once into the next wider mode and recurse. */
32986 enum machine_mode smode, wsmode, wvmode;
32989 smode = GET_MODE_INNER (mode);
32990 wvmode = get_mode_wider_vector (mode);
32991 wsmode = GET_MODE_INNER (wvmode);
32993 val = convert_modes (wsmode, smode, val, true);
32994 x = expand_simple_binop (wsmode, ASHIFT, val,
32995 GEN_INT (GET_MODE_BITSIZE (smode)),
32996 NULL_RTX, 1, OPTAB_LIB_WIDEN);
32997 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
32999 x = gen_lowpart (wvmode, target);
33000 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
33008 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
33009 rtx x = gen_reg_rtx (hvmode);
33011 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
33014 x = gen_rtx_VEC_CONCAT (mode, x, x);
33015 emit_insn (gen_rtx_SET (VOIDmode, target, x));
33024 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33025 whose ONE_VAR element is VAR, and other elements are zero. Return true
33029 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
33030 rtx target, rtx var, int one_var)
33032 enum machine_mode vsimode;
33035 bool use_vector_set = false;
33040 /* For SSE4.1, we normally use vector set. But if the second
33041 element is zero and inter-unit moves are OK, we use movq
33043 use_vector_set = (TARGET_64BIT
33045 && !(TARGET_INTER_UNIT_MOVES
33051 use_vector_set = TARGET_SSE4_1;
33054 use_vector_set = TARGET_SSE2;
33057 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
33064 use_vector_set = TARGET_AVX;
33067 /* Use ix86_expand_vector_set in 64bit mode only. */
33068 use_vector_set = TARGET_AVX && TARGET_64BIT;
33074 if (use_vector_set)
33076 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
33077 var = force_reg (GET_MODE_INNER (mode), var);
33078 ix86_expand_vector_set (mmx_ok, target, var, one_var);
33094 var = force_reg (GET_MODE_INNER (mode), var);
33095 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
33096 emit_insn (gen_rtx_SET (VOIDmode, target, x));
33101 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
33102 new_target = gen_reg_rtx (mode);
33104 new_target = target;
33105 var = force_reg (GET_MODE_INNER (mode), var);
33106 x = gen_rtx_VEC_DUPLICATE (mode, var);
33107 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
33108 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
33111 /* We need to shuffle the value to the correct position, so
33112 create a new pseudo to store the intermediate result. */
33114 /* With SSE2, we can use the integer shuffle insns. */
33115 if (mode != V4SFmode && TARGET_SSE2)
33117 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
33119 GEN_INT (one_var == 1 ? 0 : 1),
33120 GEN_INT (one_var == 2 ? 0 : 1),
33121 GEN_INT (one_var == 3 ? 0 : 1)));
33122 if (target != new_target)
33123 emit_move_insn (target, new_target);
33127 /* Otherwise convert the intermediate result to V4SFmode and
33128 use the SSE1 shuffle instructions. */
33129 if (mode != V4SFmode)
33131 tmp = gen_reg_rtx (V4SFmode);
33132 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
33137 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
33139 GEN_INT (one_var == 1 ? 0 : 1),
33140 GEN_INT (one_var == 2 ? 0+4 : 1+4),
33141 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
33143 if (mode != V4SFmode)
33144 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
33145 else if (tmp != target)
33146 emit_move_insn (target, tmp);
33148 else if (target != new_target)
33149 emit_move_insn (target, new_target);
33154 vsimode = V4SImode;
33160 vsimode = V2SImode;
33166 /* Zero extend the variable element to SImode and recurse. */
33167 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
33169 x = gen_reg_rtx (vsimode);
33170 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
33172 gcc_unreachable ();
33174 emit_move_insn (target, gen_lowpart (mode, x));
33182 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33183 consisting of the values in VALS. It is known that all elements
33184 except ONE_VAR are constants. Return true if successful. */
33187 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
33188 rtx target, rtx vals, int one_var)
33190 rtx var = XVECEXP (vals, 0, one_var);
33191 enum machine_mode wmode;
33194 const_vec = copy_rtx (vals);
33195 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
33196 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
33204 /* For the two element vectors, it's just as easy to use
33205 the general case. */
33209 /* Use ix86_expand_vector_set in 64bit mode only. */
33232 /* There's no way to set one QImode entry easily. Combine
33233 the variable value with its adjacent constant value, and
33234 promote to an HImode set. */
33235 x = XVECEXP (vals, 0, one_var ^ 1);
33238 var = convert_modes (HImode, QImode, var, true);
33239 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
33240 NULL_RTX, 1, OPTAB_LIB_WIDEN);
33241 x = GEN_INT (INTVAL (x) & 0xff);
33245 var = convert_modes (HImode, QImode, var, true);
33246 x = gen_int_mode (INTVAL (x) << 8, HImode);
33248 if (x != const0_rtx)
33249 var = expand_simple_binop (HImode, IOR, var, x, var,
33250 1, OPTAB_LIB_WIDEN);
33252 x = gen_reg_rtx (wmode);
33253 emit_move_insn (x, gen_lowpart (wmode, const_vec));
33254 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
33256 emit_move_insn (target, gen_lowpart (mode, x));
33263 emit_move_insn (target, const_vec);
33264 ix86_expand_vector_set (mmx_ok, target, var, one_var);
33268 /* A subroutine of ix86_expand_vector_init_general. Use vector
33269 concatenate to handle the most general case: all values variable,
33270 and none identical. */
33273 ix86_expand_vector_init_concat (enum machine_mode mode,
33274 rtx target, rtx *ops, int n)
33276 enum machine_mode cmode, hmode = VOIDmode;
33277 rtx first[8], second[4];
33317 gcc_unreachable ();
33320 if (!register_operand (ops[1], cmode))
33321 ops[1] = force_reg (cmode, ops[1]);
33322 if (!register_operand (ops[0], cmode))
33323 ops[0] = force_reg (cmode, ops[0]);
33324 emit_insn (gen_rtx_SET (VOIDmode, target,
33325 gen_rtx_VEC_CONCAT (mode, ops[0],
33345 gcc_unreachable ();
33361 gcc_unreachable ();
33366 /* FIXME: We process inputs backward to help RA. PR 36222. */
33369 for (; i > 0; i -= 2, j--)
33371 first[j] = gen_reg_rtx (cmode);
33372 v = gen_rtvec (2, ops[i - 1], ops[i]);
33373 ix86_expand_vector_init (false, first[j],
33374 gen_rtx_PARALLEL (cmode, v));
33380 gcc_assert (hmode != VOIDmode);
33381 for (i = j = 0; i < n; i += 2, j++)
33383 second[j] = gen_reg_rtx (hmode);
33384 ix86_expand_vector_init_concat (hmode, second [j],
33388 ix86_expand_vector_init_concat (mode, target, second, n);
33391 ix86_expand_vector_init_concat (mode, target, first, n);
33395 gcc_unreachable ();
33399 /* A subroutine of ix86_expand_vector_init_general. Use vector
33400 interleave to handle the most general case: all values variable,
33401 and none identical. */
33404 ix86_expand_vector_init_interleave (enum machine_mode mode,
33405 rtx target, rtx *ops, int n)
33407 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
33410 rtx (*gen_load_even) (rtx, rtx, rtx);
33411 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
33412 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
33417 gen_load_even = gen_vec_setv8hi;
33418 gen_interleave_first_low = gen_vec_interleave_lowv4si;
33419 gen_interleave_second_low = gen_vec_interleave_lowv2di;
33420 inner_mode = HImode;
33421 first_imode = V4SImode;
33422 second_imode = V2DImode;
33423 third_imode = VOIDmode;
33426 gen_load_even = gen_vec_setv16qi;
33427 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
33428 gen_interleave_second_low = gen_vec_interleave_lowv4si;
33429 inner_mode = QImode;
33430 first_imode = V8HImode;
33431 second_imode = V4SImode;
33432 third_imode = V2DImode;
33435 gcc_unreachable ();
33438 for (i = 0; i < n; i++)
33440 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
33441 op0 = gen_reg_rtx (SImode);
33442 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
33444 /* Insert the SImode value as low element of V4SImode vector. */
33445 op1 = gen_reg_rtx (V4SImode);
33446 op0 = gen_rtx_VEC_MERGE (V4SImode,
33447 gen_rtx_VEC_DUPLICATE (V4SImode,
33449 CONST0_RTX (V4SImode),
33451 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
33453 /* Cast the V4SImode vector back to a vector in orignal mode. */
33454 op0 = gen_reg_rtx (mode);
33455 emit_move_insn (op0, gen_lowpart (mode, op1));
33457 /* Load even elements into the second positon. */
33458 emit_insn (gen_load_even (op0,
33459 force_reg (inner_mode,
33463 /* Cast vector to FIRST_IMODE vector. */
33464 ops[i] = gen_reg_rtx (first_imode);
33465 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
33468 /* Interleave low FIRST_IMODE vectors. */
33469 for (i = j = 0; i < n; i += 2, j++)
33471 op0 = gen_reg_rtx (first_imode);
33472 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
33474 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
33475 ops[j] = gen_reg_rtx (second_imode);
33476 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
33479 /* Interleave low SECOND_IMODE vectors. */
33480 switch (second_imode)
33483 for (i = j = 0; i < n / 2; i += 2, j++)
33485 op0 = gen_reg_rtx (second_imode);
33486 emit_insn (gen_interleave_second_low (op0, ops[i],
33489 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
33491 ops[j] = gen_reg_rtx (third_imode);
33492 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
33494 second_imode = V2DImode;
33495 gen_interleave_second_low = gen_vec_interleave_lowv2di;
33499 op0 = gen_reg_rtx (second_imode);
33500 emit_insn (gen_interleave_second_low (op0, ops[0],
33503 /* Cast the SECOND_IMODE vector back to a vector on original
33505 emit_insn (gen_rtx_SET (VOIDmode, target,
33506 gen_lowpart (mode, op0)));
33510 gcc_unreachable ();
33514 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
33515 all values variable, and none identical. */
33518 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
33519 rtx target, rtx vals)
33521 rtx ops[32], op0, op1;
33522 enum machine_mode half_mode = VOIDmode;
33529 if (!mmx_ok && !TARGET_SSE)
33541 n = GET_MODE_NUNITS (mode);
33542 for (i = 0; i < n; i++)
33543 ops[i] = XVECEXP (vals, 0, i);
33544 ix86_expand_vector_init_concat (mode, target, ops, n);
33548 half_mode = V16QImode;
33552 half_mode = V8HImode;
33556 n = GET_MODE_NUNITS (mode);
33557 for (i = 0; i < n; i++)
33558 ops[i] = XVECEXP (vals, 0, i);
33559 op0 = gen_reg_rtx (half_mode);
33560 op1 = gen_reg_rtx (half_mode);
33561 ix86_expand_vector_init_interleave (half_mode, op0, ops,
33563 ix86_expand_vector_init_interleave (half_mode, op1,
33564 &ops [n >> 1], n >> 2);
33565 emit_insn (gen_rtx_SET (VOIDmode, target,
33566 gen_rtx_VEC_CONCAT (mode, op0, op1)));
33570 if (!TARGET_SSE4_1)
33578 /* Don't use ix86_expand_vector_init_interleave if we can't
33579 move from GPR to SSE register directly. */
33580 if (!TARGET_INTER_UNIT_MOVES)
33583 n = GET_MODE_NUNITS (mode);
33584 for (i = 0; i < n; i++)
33585 ops[i] = XVECEXP (vals, 0, i);
33586 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
33594 gcc_unreachable ();
33598 int i, j, n_elts, n_words, n_elt_per_word;
33599 enum machine_mode inner_mode;
33600 rtx words[4], shift;
33602 inner_mode = GET_MODE_INNER (mode);
33603 n_elts = GET_MODE_NUNITS (mode);
33604 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
33605 n_elt_per_word = n_elts / n_words;
33606 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
33608 for (i = 0; i < n_words; ++i)
33610 rtx word = NULL_RTX;
33612 for (j = 0; j < n_elt_per_word; ++j)
33614 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
33615 elt = convert_modes (word_mode, inner_mode, elt, true);
33621 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
33622 word, 1, OPTAB_LIB_WIDEN);
33623 word = expand_simple_binop (word_mode, IOR, word, elt,
33624 word, 1, OPTAB_LIB_WIDEN);
33632 emit_move_insn (target, gen_lowpart (mode, words[0]));
33633 else if (n_words == 2)
33635 rtx tmp = gen_reg_rtx (mode);
33636 emit_clobber (tmp);
33637 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
33638 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
33639 emit_move_insn (target, tmp);
33641 else if (n_words == 4)
33643 rtx tmp = gen_reg_rtx (V4SImode);
33644 gcc_assert (word_mode == SImode);
33645 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
33646 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
33647 emit_move_insn (target, gen_lowpart (mode, tmp));
33650 gcc_unreachable ();
33654 /* Initialize vector TARGET via VALS. Suppress the use of MMX
33655 instructions unless MMX_OK is true. */
33658 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
33660 enum machine_mode mode = GET_MODE (target);
33661 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33662 int n_elts = GET_MODE_NUNITS (mode);
33663 int n_var = 0, one_var = -1;
33664 bool all_same = true, all_const_zero = true;
33668 for (i = 0; i < n_elts; ++i)
33670 x = XVECEXP (vals, 0, i);
33671 if (!(CONST_INT_P (x)
33672 || GET_CODE (x) == CONST_DOUBLE
33673 || GET_CODE (x) == CONST_FIXED))
33674 n_var++, one_var = i;
33675 else if (x != CONST0_RTX (inner_mode))
33676 all_const_zero = false;
33677 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
33681 /* Constants are best loaded from the constant pool. */
33684 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
33688 /* If all values are identical, broadcast the value. */
33690 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
33691 XVECEXP (vals, 0, 0)))
33694 /* Values where only one field is non-constant are best loaded from
33695 the pool and overwritten via move later. */
33699 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
33700 XVECEXP (vals, 0, one_var),
33704 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
33708 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
33712 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
33714 enum machine_mode mode = GET_MODE (target);
33715 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33716 enum machine_mode half_mode;
33717 bool use_vec_merge = false;
33719 static rtx (*gen_extract[6][2]) (rtx, rtx)
33721 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
33722 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
33723 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
33724 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
33725 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
33726 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
33728 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
33730 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
33731 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
33732 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
33733 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
33734 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
33735 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
33745 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
33746 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
33748 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
33750 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
33751 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33757 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
33761 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
33762 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
33764 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
33766 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
33767 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33774 /* For the two element vectors, we implement a VEC_CONCAT with
33775 the extraction of the other element. */
33777 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
33778 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
33781 op0 = val, op1 = tmp;
33783 op0 = tmp, op1 = val;
33785 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
33786 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33791 use_vec_merge = TARGET_SSE4_1;
33798 use_vec_merge = true;
33802 /* tmp = target = A B C D */
33803 tmp = copy_to_reg (target);
33804 /* target = A A B B */
33805 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
33806 /* target = X A B B */
33807 ix86_expand_vector_set (false, target, val, 0);
33808 /* target = A X C D */
33809 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33810 const1_rtx, const0_rtx,
33811 GEN_INT (2+4), GEN_INT (3+4)));
33815 /* tmp = target = A B C D */
33816 tmp = copy_to_reg (target);
33817 /* tmp = X B C D */
33818 ix86_expand_vector_set (false, tmp, val, 0);
33819 /* target = A B X D */
33820 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33821 const0_rtx, const1_rtx,
33822 GEN_INT (0+4), GEN_INT (3+4)));
33826 /* tmp = target = A B C D */
33827 tmp = copy_to_reg (target);
33828 /* tmp = X B C D */
33829 ix86_expand_vector_set (false, tmp, val, 0);
33830 /* target = A B X D */
33831 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33832 const0_rtx, const1_rtx,
33833 GEN_INT (2+4), GEN_INT (0+4)));
33837 gcc_unreachable ();
33842 use_vec_merge = TARGET_SSE4_1;
33846 /* Element 0 handled by vec_merge below. */
33849 use_vec_merge = true;
33855 /* With SSE2, use integer shuffles to swap element 0 and ELT,
33856 store into element 0, then shuffle them back. */
33860 order[0] = GEN_INT (elt);
33861 order[1] = const1_rtx;
33862 order[2] = const2_rtx;
33863 order[3] = GEN_INT (3);
33864 order[elt] = const0_rtx;
33866 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
33867 order[1], order[2], order[3]));
33869 ix86_expand_vector_set (false, target, val, 0);
33871 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
33872 order[1], order[2], order[3]));
33876 /* For SSE1, we have to reuse the V4SF code. */
33877 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
33878 gen_lowpart (SFmode, val), elt);
33883 use_vec_merge = TARGET_SSE2;
33886 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
33890 use_vec_merge = TARGET_SSE4_1;
33897 half_mode = V16QImode;
33903 half_mode = V8HImode;
33909 half_mode = V4SImode;
33915 half_mode = V2DImode;
33921 half_mode = V4SFmode;
33927 half_mode = V2DFmode;
33933 /* Compute offset. */
33937 gcc_assert (i <= 1);
33939 /* Extract the half. */
33940 tmp = gen_reg_rtx (half_mode);
33941 emit_insn (gen_extract[j][i] (tmp, target));
33943 /* Put val in tmp at elt. */
33944 ix86_expand_vector_set (false, tmp, val, elt);
33947 emit_insn (gen_insert[j][i] (target, target, tmp));
33956 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
33957 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
33958 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33962 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
33964 emit_move_insn (mem, target);
33966 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
33967 emit_move_insn (tmp, val);
33969 emit_move_insn (target, mem);
33974 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
33976 enum machine_mode mode = GET_MODE (vec);
33977 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33978 bool use_vec_extr = false;
33991 use_vec_extr = true;
33995 use_vec_extr = TARGET_SSE4_1;
34007 tmp = gen_reg_rtx (mode);
34008 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
34009 GEN_INT (elt), GEN_INT (elt),
34010 GEN_INT (elt+4), GEN_INT (elt+4)));
34014 tmp = gen_reg_rtx (mode);
34015 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
34019 gcc_unreachable ();
34022 use_vec_extr = true;
34027 use_vec_extr = TARGET_SSE4_1;
34041 tmp = gen_reg_rtx (mode);
34042 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
34043 GEN_INT (elt), GEN_INT (elt),
34044 GEN_INT (elt), GEN_INT (elt)));
34048 tmp = gen_reg_rtx (mode);
34049 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
34053 gcc_unreachable ();
34056 use_vec_extr = true;
34061 /* For SSE1, we have to reuse the V4SF code. */
34062 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
34063 gen_lowpart (V4SFmode, vec), elt);
34069 use_vec_extr = TARGET_SSE2;
34072 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
34076 use_vec_extr = TARGET_SSE4_1;
34082 tmp = gen_reg_rtx (V4SFmode);
34084 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
34086 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
34087 ix86_expand_vector_extract (false, target, tmp, elt & 3);
34095 tmp = gen_reg_rtx (V2DFmode);
34097 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
34099 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
34100 ix86_expand_vector_extract (false, target, tmp, elt & 1);
34108 tmp = gen_reg_rtx (V16QImode);
34110 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
34112 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
34113 ix86_expand_vector_extract (false, target, tmp, elt & 15);
34121 tmp = gen_reg_rtx (V8HImode);
34123 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
34125 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
34126 ix86_expand_vector_extract (false, target, tmp, elt & 7);
34134 tmp = gen_reg_rtx (V4SImode);
34136 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
34138 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
34139 ix86_expand_vector_extract (false, target, tmp, elt & 3);
34147 tmp = gen_reg_rtx (V2DImode);
34149 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
34151 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
34152 ix86_expand_vector_extract (false, target, tmp, elt & 1);
34158 /* ??? Could extract the appropriate HImode element and shift. */
34165 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
34166 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
34168 /* Let the rtl optimizers know about the zero extension performed. */
34169 if (inner_mode == QImode || inner_mode == HImode)
34171 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
34172 target = gen_lowpart (SImode, target);
34175 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
34179 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
34181 emit_move_insn (mem, vec);
34183 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
34184 emit_move_insn (target, tmp);
34188 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
34189 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
34190 The upper bits of DEST are undefined, though they shouldn't cause
34191 exceptions (some bits from src or all zeros are ok). */
34194 emit_reduc_half (rtx dest, rtx src, int i)
34197 switch (GET_MODE (src))
34201 tem = gen_sse_movhlps (dest, src, src);
34203 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
34204 GEN_INT (1 + 4), GEN_INT (1 + 4));
34207 tem = gen_vec_interleave_highv2df (dest, src, src);
34213 tem = gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, dest),
34214 gen_lowpart (V1TImode, src),
34219 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
34221 tem = gen_avx_shufps256 (dest, src, src,
34222 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
34226 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
34228 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
34235 tem = gen_avx2_permv2ti (gen_lowpart (V4DImode, dest),
34236 gen_lowpart (V4DImode, src),
34237 gen_lowpart (V4DImode, src),
34240 tem = gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, dest),
34241 gen_lowpart (V2TImode, src),
34245 gcc_unreachable ();
34250 /* Expand a vector reduction. FN is the binary pattern to reduce;
34251 DEST is the destination; IN is the input vector. */
34254 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
34256 rtx half, dst, vec = in;
34257 enum machine_mode mode = GET_MODE (in);
34260 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
34262 && mode == V8HImode
34263 && fn == gen_uminv8hi3)
34265 emit_insn (gen_sse4_1_phminposuw (dest, in));
34269 for (i = GET_MODE_BITSIZE (mode);
34270 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
34273 half = gen_reg_rtx (mode);
34274 emit_reduc_half (half, vec, i);
34275 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
34278 dst = gen_reg_rtx (mode);
34279 emit_insn (fn (dst, half, vec));
34284 /* Target hook for scalar_mode_supported_p. */
34286 ix86_scalar_mode_supported_p (enum machine_mode mode)
34288 if (DECIMAL_FLOAT_MODE_P (mode))
34289 return default_decimal_float_supported_p ();
34290 else if (mode == TFmode)
34293 return default_scalar_mode_supported_p (mode);
34296 /* Implements target hook vector_mode_supported_p. */
34298 ix86_vector_mode_supported_p (enum machine_mode mode)
34300 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
34302 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
34304 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
34306 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
34308 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
34313 /* Target hook for c_mode_for_suffix. */
34314 static enum machine_mode
34315 ix86_c_mode_for_suffix (char suffix)
34325 /* Worker function for TARGET_MD_ASM_CLOBBERS.
34327 We do this in the new i386 backend to maintain source compatibility
34328 with the old cc0-based compiler. */
34331 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
34332 tree inputs ATTRIBUTE_UNUSED,
34335 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
34337 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
34342 /* Implements target vector targetm.asm.encode_section_info. */
34344 static void ATTRIBUTE_UNUSED
34345 ix86_encode_section_info (tree decl, rtx rtl, int first)
34347 default_encode_section_info (decl, rtl, first);
34349 if (TREE_CODE (decl) == VAR_DECL
34350 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
34351 && ix86_in_large_data_p (decl))
34352 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
34355 /* Worker function for REVERSE_CONDITION. */
34358 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
34360 return (mode != CCFPmode && mode != CCFPUmode
34361 ? reverse_condition (code)
34362 : reverse_condition_maybe_unordered (code));
34365 /* Output code to perform an x87 FP register move, from OPERANDS[1]
34369 output_387_reg_move (rtx insn, rtx *operands)
34371 if (REG_P (operands[0]))
34373 if (REG_P (operands[1])
34374 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
34376 if (REGNO (operands[0]) == FIRST_STACK_REG)
34377 return output_387_ffreep (operands, 0);
34378 return "fstp\t%y0";
34380 if (STACK_TOP_P (operands[0]))
34381 return "fld%Z1\t%y1";
34384 else if (MEM_P (operands[0]))
34386 gcc_assert (REG_P (operands[1]));
34387 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
34388 return "fstp%Z0\t%y0";
34391 /* There is no non-popping store to memory for XFmode.
34392 So if we need one, follow the store with a load. */
34393 if (GET_MODE (operands[0]) == XFmode)
34394 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
34396 return "fst%Z0\t%y0";
34403 /* Output code to perform a conditional jump to LABEL, if C2 flag in
34404 FP status register is set. */
34407 ix86_emit_fp_unordered_jump (rtx label)
34409 rtx reg = gen_reg_rtx (HImode);
34412 emit_insn (gen_x86_fnstsw_1 (reg));
34414 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
34416 emit_insn (gen_x86_sahf_1 (reg));
34418 temp = gen_rtx_REG (CCmode, FLAGS_REG);
34419 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
34423 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
34425 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
34426 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
34429 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
34430 gen_rtx_LABEL_REF (VOIDmode, label),
34432 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
34434 emit_jump_insn (temp);
34435 predict_jump (REG_BR_PROB_BASE * 10 / 100);
34438 /* Output code to perform a log1p XFmode calculation. */
34440 void ix86_emit_i387_log1p (rtx op0, rtx op1)
34442 rtx label1 = gen_label_rtx ();
34443 rtx label2 = gen_label_rtx ();
34445 rtx tmp = gen_reg_rtx (XFmode);
34446 rtx tmp2 = gen_reg_rtx (XFmode);
34449 emit_insn (gen_absxf2 (tmp, op1));
34450 test = gen_rtx_GE (VOIDmode, tmp,
34451 CONST_DOUBLE_FROM_REAL_VALUE (
34452 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
34454 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
34456 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
34457 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
34458 emit_jump (label2);
34460 emit_label (label1);
34461 emit_move_insn (tmp, CONST1_RTX (XFmode));
34462 emit_insn (gen_addxf3 (tmp, op1, tmp));
34463 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
34464 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
34466 emit_label (label2);
34469 /* Emit code for round calculation. */
34470 void ix86_emit_i387_round (rtx op0, rtx op1)
34472 enum machine_mode inmode = GET_MODE (op1);
34473 enum machine_mode outmode = GET_MODE (op0);
34474 rtx e1, e2, res, tmp, tmp1, half;
34475 rtx scratch = gen_reg_rtx (HImode);
34476 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
34477 rtx jump_label = gen_label_rtx ();
34479 rtx (*gen_abs) (rtx, rtx);
34480 rtx (*gen_neg) (rtx, rtx);
34485 gen_abs = gen_abssf2;
34488 gen_abs = gen_absdf2;
34491 gen_abs = gen_absxf2;
34494 gcc_unreachable ();
34500 gen_neg = gen_negsf2;
34503 gen_neg = gen_negdf2;
34506 gen_neg = gen_negxf2;
34509 gen_neg = gen_neghi2;
34512 gen_neg = gen_negsi2;
34515 gen_neg = gen_negdi2;
34518 gcc_unreachable ();
34521 e1 = gen_reg_rtx (inmode);
34522 e2 = gen_reg_rtx (inmode);
34523 res = gen_reg_rtx (outmode);
34525 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
34527 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
34529 /* scratch = fxam(op1) */
34530 emit_insn (gen_rtx_SET (VOIDmode, scratch,
34531 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
34533 /* e1 = fabs(op1) */
34534 emit_insn (gen_abs (e1, op1));
34536 /* e2 = e1 + 0.5 */
34537 half = force_reg (inmode, half);
34538 emit_insn (gen_rtx_SET (VOIDmode, e2,
34539 gen_rtx_PLUS (inmode, e1, half)));
34541 /* res = floor(e2) */
34542 if (inmode != XFmode)
34544 tmp1 = gen_reg_rtx (XFmode);
34546 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
34547 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
34557 rtx tmp0 = gen_reg_rtx (XFmode);
34559 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
34561 emit_insn (gen_rtx_SET (VOIDmode, res,
34562 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
34563 UNSPEC_TRUNC_NOOP)));
34567 emit_insn (gen_frndintxf2_floor (res, tmp1));
34570 emit_insn (gen_lfloorxfhi2 (res, tmp1));
34573 emit_insn (gen_lfloorxfsi2 (res, tmp1));
34576 emit_insn (gen_lfloorxfdi2 (res, tmp1));
34579 gcc_unreachable ();
34582 /* flags = signbit(a) */
34583 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
34585 /* if (flags) then res = -res */
34586 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
34587 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
34588 gen_rtx_LABEL_REF (VOIDmode, jump_label),
34590 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
34591 predict_jump (REG_BR_PROB_BASE * 50 / 100);
34592 JUMP_LABEL (insn) = jump_label;
34594 emit_insn (gen_neg (res, res));
34596 emit_label (jump_label);
34597 LABEL_NUSES (jump_label) = 1;
34599 emit_move_insn (op0, res);
34602 /* Output code to perform a Newton-Rhapson approximation of a single precision
34603 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
34605 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
34607 rtx x0, x1, e0, e1;
34609 x0 = gen_reg_rtx (mode);
34610 e0 = gen_reg_rtx (mode);
34611 e1 = gen_reg_rtx (mode);
34612 x1 = gen_reg_rtx (mode);
34614 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
34616 b = force_reg (mode, b);
34618 /* x0 = rcp(b) estimate */
34619 emit_insn (gen_rtx_SET (VOIDmode, x0,
34620 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
34623 emit_insn (gen_rtx_SET (VOIDmode, e0,
34624 gen_rtx_MULT (mode, x0, b)));
34627 emit_insn (gen_rtx_SET (VOIDmode, e0,
34628 gen_rtx_MULT (mode, x0, e0)));
34631 emit_insn (gen_rtx_SET (VOIDmode, e1,
34632 gen_rtx_PLUS (mode, x0, x0)));
34635 emit_insn (gen_rtx_SET (VOIDmode, x1,
34636 gen_rtx_MINUS (mode, e1, e0)));
34639 emit_insn (gen_rtx_SET (VOIDmode, res,
34640 gen_rtx_MULT (mode, a, x1)));
34643 /* Output code to perform a Newton-Rhapson approximation of a
34644 single precision floating point [reciprocal] square root. */
34646 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
34649 rtx x0, e0, e1, e2, e3, mthree, mhalf;
34652 x0 = gen_reg_rtx (mode);
34653 e0 = gen_reg_rtx (mode);
34654 e1 = gen_reg_rtx (mode);
34655 e2 = gen_reg_rtx (mode);
34656 e3 = gen_reg_rtx (mode);
34658 real_from_integer (&r, VOIDmode, -3, -1, 0);
34659 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
34661 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
34662 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
34664 if (VECTOR_MODE_P (mode))
34666 mthree = ix86_build_const_vector (mode, true, mthree);
34667 mhalf = ix86_build_const_vector (mode, true, mhalf);
34670 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
34671 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
34673 a = force_reg (mode, a);
34675 /* x0 = rsqrt(a) estimate */
34676 emit_insn (gen_rtx_SET (VOIDmode, x0,
34677 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
34680 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
34685 zero = gen_reg_rtx (mode);
34686 mask = gen_reg_rtx (mode);
34688 zero = force_reg (mode, CONST0_RTX(mode));
34689 emit_insn (gen_rtx_SET (VOIDmode, mask,
34690 gen_rtx_NE (mode, zero, a)));
34692 emit_insn (gen_rtx_SET (VOIDmode, x0,
34693 gen_rtx_AND (mode, x0, mask)));
34697 emit_insn (gen_rtx_SET (VOIDmode, e0,
34698 gen_rtx_MULT (mode, x0, a)));
34700 emit_insn (gen_rtx_SET (VOIDmode, e1,
34701 gen_rtx_MULT (mode, e0, x0)));
34704 mthree = force_reg (mode, mthree);
34705 emit_insn (gen_rtx_SET (VOIDmode, e2,
34706 gen_rtx_PLUS (mode, e1, mthree)));
34708 mhalf = force_reg (mode, mhalf);
34710 /* e3 = -.5 * x0 */
34711 emit_insn (gen_rtx_SET (VOIDmode, e3,
34712 gen_rtx_MULT (mode, x0, mhalf)));
34714 /* e3 = -.5 * e0 */
34715 emit_insn (gen_rtx_SET (VOIDmode, e3,
34716 gen_rtx_MULT (mode, e0, mhalf)));
34717 /* ret = e2 * e3 */
34718 emit_insn (gen_rtx_SET (VOIDmode, res,
34719 gen_rtx_MULT (mode, e2, e3)));
34722 #ifdef TARGET_SOLARIS
34723 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
34726 i386_solaris_elf_named_section (const char *name, unsigned int flags,
34729 /* With Binutils 2.15, the "@unwind" marker must be specified on
34730 every occurrence of the ".eh_frame" section, not just the first
34733 && strcmp (name, ".eh_frame") == 0)
34735 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
34736 flags & SECTION_WRITE ? "aw" : "a");
34741 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
34743 solaris_elf_asm_comdat_section (name, flags, decl);
34748 default_elf_asm_named_section (name, flags, decl);
34750 #endif /* TARGET_SOLARIS */
34752 /* Return the mangling of TYPE if it is an extended fundamental type. */
34754 static const char *
34755 ix86_mangle_type (const_tree type)
34757 type = TYPE_MAIN_VARIANT (type);
34759 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
34760 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
34763 switch (TYPE_MODE (type))
34766 /* __float128 is "g". */
34769 /* "long double" or __float80 is "e". */
34776 /* For 32-bit code we can save PIC register setup by using
34777 __stack_chk_fail_local hidden function instead of calling
34778 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
34779 register, so it is better to call __stack_chk_fail directly. */
34781 static tree ATTRIBUTE_UNUSED
34782 ix86_stack_protect_fail (void)
34784 return TARGET_64BIT
34785 ? default_external_stack_protect_fail ()
34786 : default_hidden_stack_protect_fail ();
34789 /* Select a format to encode pointers in exception handling data. CODE
34790 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
34791 true if the symbol may be affected by dynamic relocations.
34793 ??? All x86 object file formats are capable of representing this.
34794 After all, the relocation needed is the same as for the call insn.
34795 Whether or not a particular assembler allows us to enter such, I
34796 guess we'll have to see. */
34798 asm_preferred_eh_data_format (int code, int global)
34802 int type = DW_EH_PE_sdata8;
34804 || ix86_cmodel == CM_SMALL_PIC
34805 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
34806 type = DW_EH_PE_sdata4;
34807 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
34809 if (ix86_cmodel == CM_SMALL
34810 || (ix86_cmodel == CM_MEDIUM && code))
34811 return DW_EH_PE_udata4;
34812 return DW_EH_PE_absptr;
34815 /* Expand copysign from SIGN to the positive value ABS_VALUE
34816 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
34819 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
34821 enum machine_mode mode = GET_MODE (sign);
34822 rtx sgn = gen_reg_rtx (mode);
34823 if (mask == NULL_RTX)
34825 enum machine_mode vmode;
34827 if (mode == SFmode)
34829 else if (mode == DFmode)
34834 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
34835 if (!VECTOR_MODE_P (mode))
34837 /* We need to generate a scalar mode mask in this case. */
34838 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
34839 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
34840 mask = gen_reg_rtx (mode);
34841 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
34845 mask = gen_rtx_NOT (mode, mask);
34846 emit_insn (gen_rtx_SET (VOIDmode, sgn,
34847 gen_rtx_AND (mode, mask, sign)));
34848 emit_insn (gen_rtx_SET (VOIDmode, result,
34849 gen_rtx_IOR (mode, abs_value, sgn)));
34852 /* Expand fabs (OP0) and return a new rtx that holds the result. The
34853 mask for masking out the sign-bit is stored in *SMASK, if that is
34856 ix86_expand_sse_fabs (rtx op0, rtx *smask)
34858 enum machine_mode vmode, mode = GET_MODE (op0);
34861 xa = gen_reg_rtx (mode);
34862 if (mode == SFmode)
34864 else if (mode == DFmode)
34868 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
34869 if (!VECTOR_MODE_P (mode))
34871 /* We need to generate a scalar mode mask in this case. */
34872 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
34873 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
34874 mask = gen_reg_rtx (mode);
34875 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
34877 emit_insn (gen_rtx_SET (VOIDmode, xa,
34878 gen_rtx_AND (mode, op0, mask)));
34886 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
34887 swapping the operands if SWAP_OPERANDS is true. The expanded
34888 code is a forward jump to a newly created label in case the
34889 comparison is true. The generated label rtx is returned. */
34891 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
34892 bool swap_operands)
34903 label = gen_label_rtx ();
34904 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
34905 emit_insn (gen_rtx_SET (VOIDmode, tmp,
34906 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
34907 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
34908 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
34909 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
34910 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
34911 JUMP_LABEL (tmp) = label;
34916 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
34917 using comparison code CODE. Operands are swapped for the comparison if
34918 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
34920 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
34921 bool swap_operands)
34923 rtx (*insn)(rtx, rtx, rtx, rtx);
34924 enum machine_mode mode = GET_MODE (op0);
34925 rtx mask = gen_reg_rtx (mode);
34934 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
34936 emit_insn (insn (mask, op0, op1,
34937 gen_rtx_fmt_ee (code, mode, op0, op1)));
34941 /* Generate and return a rtx of mode MODE for 2**n where n is the number
34942 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
34944 ix86_gen_TWO52 (enum machine_mode mode)
34946 REAL_VALUE_TYPE TWO52r;
34949 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
34950 TWO52 = const_double_from_real_value (TWO52r, mode);
34951 TWO52 = force_reg (mode, TWO52);
34956 /* Expand SSE sequence for computing lround from OP1 storing
34959 ix86_expand_lround (rtx op0, rtx op1)
34961 /* C code for the stuff we're doing below:
34962 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
34965 enum machine_mode mode = GET_MODE (op1);
34966 const struct real_format *fmt;
34967 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
34970 /* load nextafter (0.5, 0.0) */
34971 fmt = REAL_MODE_FORMAT (mode);
34972 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
34973 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
34975 /* adj = copysign (0.5, op1) */
34976 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
34977 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
34979 /* adj = op1 + adj */
34980 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
34982 /* op0 = (imode)adj */
34983 expand_fix (op0, adj, 0);
34986 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
34989 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
34991 /* C code for the stuff we're doing below (for do_floor):
34993 xi -= (double)xi > op1 ? 1 : 0;
34996 enum machine_mode fmode = GET_MODE (op1);
34997 enum machine_mode imode = GET_MODE (op0);
34998 rtx ireg, freg, label, tmp;
35000 /* reg = (long)op1 */
35001 ireg = gen_reg_rtx (imode);
35002 expand_fix (ireg, op1, 0);
35004 /* freg = (double)reg */
35005 freg = gen_reg_rtx (fmode);
35006 expand_float (freg, ireg, 0);
35008 /* ireg = (freg > op1) ? ireg - 1 : ireg */
35009 label = ix86_expand_sse_compare_and_jump (UNLE,
35010 freg, op1, !do_floor);
35011 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
35012 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
35013 emit_move_insn (ireg, tmp);
35015 emit_label (label);
35016 LABEL_NUSES (label) = 1;
35018 emit_move_insn (op0, ireg);
35021 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
35022 result in OPERAND0. */
35024 ix86_expand_rint (rtx operand0, rtx operand1)
35026 /* C code for the stuff we're doing below:
35027 xa = fabs (operand1);
35028 if (!isless (xa, 2**52))
35030 xa = xa + 2**52 - 2**52;
35031 return copysign (xa, operand1);
35033 enum machine_mode mode = GET_MODE (operand0);
35034 rtx res, xa, label, TWO52, mask;
35036 res = gen_reg_rtx (mode);
35037 emit_move_insn (res, operand1);
35039 /* xa = abs (operand1) */
35040 xa = ix86_expand_sse_fabs (res, &mask);
35042 /* if (!isless (xa, TWO52)) goto label; */
35043 TWO52 = ix86_gen_TWO52 (mode);
35044 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35046 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35047 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
35049 ix86_sse_copysign_to_positive (res, xa, res, mask);
35051 emit_label (label);
35052 LABEL_NUSES (label) = 1;
35054 emit_move_insn (operand0, res);
35057 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35060 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
35062 /* C code for the stuff we expand below.
35063 double xa = fabs (x), x2;
35064 if (!isless (xa, TWO52))
35066 xa = xa + TWO52 - TWO52;
35067 x2 = copysign (xa, x);
35076 enum machine_mode mode = GET_MODE (operand0);
35077 rtx xa, TWO52, tmp, label, one, res, mask;
35079 TWO52 = ix86_gen_TWO52 (mode);
35081 /* Temporary for holding the result, initialized to the input
35082 operand to ease control flow. */
35083 res = gen_reg_rtx (mode);
35084 emit_move_insn (res, operand1);
35086 /* xa = abs (operand1) */
35087 xa = ix86_expand_sse_fabs (res, &mask);
35089 /* if (!isless (xa, TWO52)) goto label; */
35090 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35092 /* xa = xa + TWO52 - TWO52; */
35093 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35094 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
35096 /* xa = copysign (xa, operand1) */
35097 ix86_sse_copysign_to_positive (xa, xa, res, mask);
35099 /* generate 1.0 or -1.0 */
35100 one = force_reg (mode,
35101 const_double_from_real_value (do_floor
35102 ? dconst1 : dconstm1, mode));
35104 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35105 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
35106 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35107 gen_rtx_AND (mode, one, tmp)));
35108 /* We always need to subtract here to preserve signed zero. */
35109 tmp = expand_simple_binop (mode, MINUS,
35110 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35111 emit_move_insn (res, tmp);
35113 emit_label (label);
35114 LABEL_NUSES (label) = 1;
35116 emit_move_insn (operand0, res);
35119 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35122 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
35124 /* C code for the stuff we expand below.
35125 double xa = fabs (x), x2;
35126 if (!isless (xa, TWO52))
35128 x2 = (double)(long)x;
35135 if (HONOR_SIGNED_ZEROS (mode))
35136 return copysign (x2, x);
35139 enum machine_mode mode = GET_MODE (operand0);
35140 rtx xa, xi, TWO52, tmp, label, one, res, mask;
35142 TWO52 = ix86_gen_TWO52 (mode);
35144 /* Temporary for holding the result, initialized to the input
35145 operand to ease control flow. */
35146 res = gen_reg_rtx (mode);
35147 emit_move_insn (res, operand1);
35149 /* xa = abs (operand1) */
35150 xa = ix86_expand_sse_fabs (res, &mask);
35152 /* if (!isless (xa, TWO52)) goto label; */
35153 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35155 /* xa = (double)(long)x */
35156 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35157 expand_fix (xi, res, 0);
35158 expand_float (xa, xi, 0);
35161 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
35163 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35164 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
35165 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35166 gen_rtx_AND (mode, one, tmp)));
35167 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
35168 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35169 emit_move_insn (res, tmp);
35171 if (HONOR_SIGNED_ZEROS (mode))
35172 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
35174 emit_label (label);
35175 LABEL_NUSES (label) = 1;
35177 emit_move_insn (operand0, res);
35180 /* Expand SSE sequence for computing round from OPERAND1 storing
35181 into OPERAND0. Sequence that works without relying on DImode truncation
35182 via cvttsd2siq that is only available on 64bit targets. */
35184 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
35186 /* C code for the stuff we expand below.
35187 double xa = fabs (x), xa2, x2;
35188 if (!isless (xa, TWO52))
35190 Using the absolute value and copying back sign makes
35191 -0.0 -> -0.0 correct.
35192 xa2 = xa + TWO52 - TWO52;
35197 else if (dxa > 0.5)
35199 x2 = copysign (xa2, x);
35202 enum machine_mode mode = GET_MODE (operand0);
35203 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
35205 TWO52 = ix86_gen_TWO52 (mode);
35207 /* Temporary for holding the result, initialized to the input
35208 operand to ease control flow. */
35209 res = gen_reg_rtx (mode);
35210 emit_move_insn (res, operand1);
35212 /* xa = abs (operand1) */
35213 xa = ix86_expand_sse_fabs (res, &mask);
35215 /* if (!isless (xa, TWO52)) goto label; */
35216 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35218 /* xa2 = xa + TWO52 - TWO52; */
35219 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35220 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
35222 /* dxa = xa2 - xa; */
35223 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
35225 /* generate 0.5, 1.0 and -0.5 */
35226 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
35227 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
35228 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
35232 tmp = gen_reg_rtx (mode);
35233 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
35234 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
35235 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35236 gen_rtx_AND (mode, one, tmp)));
35237 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35238 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
35239 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
35240 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35241 gen_rtx_AND (mode, one, tmp)));
35242 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35244 /* res = copysign (xa2, operand1) */
35245 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
35247 emit_label (label);
35248 LABEL_NUSES (label) = 1;
35250 emit_move_insn (operand0, res);
35253 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35256 ix86_expand_trunc (rtx operand0, rtx operand1)
35258 /* C code for SSE variant we expand below.
35259 double xa = fabs (x), x2;
35260 if (!isless (xa, TWO52))
35262 x2 = (double)(long)x;
35263 if (HONOR_SIGNED_ZEROS (mode))
35264 return copysign (x2, x);
35267 enum machine_mode mode = GET_MODE (operand0);
35268 rtx xa, xi, TWO52, label, res, mask;
35270 TWO52 = ix86_gen_TWO52 (mode);
35272 /* Temporary for holding the result, initialized to the input
35273 operand to ease control flow. */
35274 res = gen_reg_rtx (mode);
35275 emit_move_insn (res, operand1);
35277 /* xa = abs (operand1) */
35278 xa = ix86_expand_sse_fabs (res, &mask);
35280 /* if (!isless (xa, TWO52)) goto label; */
35281 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35283 /* x = (double)(long)x */
35284 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35285 expand_fix (xi, res, 0);
35286 expand_float (res, xi, 0);
35288 if (HONOR_SIGNED_ZEROS (mode))
35289 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
35291 emit_label (label);
35292 LABEL_NUSES (label) = 1;
35294 emit_move_insn (operand0, res);
35297 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35300 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
35302 enum machine_mode mode = GET_MODE (operand0);
35303 rtx xa, mask, TWO52, label, one, res, smask, tmp;
35305 /* C code for SSE variant we expand below.
35306 double xa = fabs (x), x2;
35307 if (!isless (xa, TWO52))
35309 xa2 = xa + TWO52 - TWO52;
35313 x2 = copysign (xa2, x);
35317 TWO52 = ix86_gen_TWO52 (mode);
35319 /* Temporary for holding the result, initialized to the input
35320 operand to ease control flow. */
35321 res = gen_reg_rtx (mode);
35322 emit_move_insn (res, operand1);
35324 /* xa = abs (operand1) */
35325 xa = ix86_expand_sse_fabs (res, &smask);
35327 /* if (!isless (xa, TWO52)) goto label; */
35328 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35330 /* res = xa + TWO52 - TWO52; */
35331 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35332 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
35333 emit_move_insn (res, tmp);
35336 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
35338 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
35339 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
35340 emit_insn (gen_rtx_SET (VOIDmode, mask,
35341 gen_rtx_AND (mode, mask, one)));
35342 tmp = expand_simple_binop (mode, MINUS,
35343 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
35344 emit_move_insn (res, tmp);
35346 /* res = copysign (res, operand1) */
35347 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
35349 emit_label (label);
35350 LABEL_NUSES (label) = 1;
35352 emit_move_insn (operand0, res);
35355 /* Expand SSE sequence for computing round from OPERAND1 storing
35358 ix86_expand_round (rtx operand0, rtx operand1)
35360 /* C code for the stuff we're doing below:
35361 double xa = fabs (x);
35362 if (!isless (xa, TWO52))
35364 xa = (double)(long)(xa + nextafter (0.5, 0.0));
35365 return copysign (xa, x);
35367 enum machine_mode mode = GET_MODE (operand0);
35368 rtx res, TWO52, xa, label, xi, half, mask;
35369 const struct real_format *fmt;
35370 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35372 /* Temporary for holding the result, initialized to the input
35373 operand to ease control flow. */
35374 res = gen_reg_rtx (mode);
35375 emit_move_insn (res, operand1);
35377 TWO52 = ix86_gen_TWO52 (mode);
35378 xa = ix86_expand_sse_fabs (res, &mask);
35379 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35381 /* load nextafter (0.5, 0.0) */
35382 fmt = REAL_MODE_FORMAT (mode);
35383 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35384 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35386 /* xa = xa + 0.5 */
35387 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
35388 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
35390 /* xa = (double)(int64_t)xa */
35391 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35392 expand_fix (xi, xa, 0);
35393 expand_float (xa, xi, 0);
35395 /* res = copysign (xa, operand1) */
35396 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
35398 emit_label (label);
35399 LABEL_NUSES (label) = 1;
35401 emit_move_insn (operand0, res);
35404 /* Expand SSE sequence for computing round
35405 from OP1 storing into OP0 using sse4 round insn. */
35407 ix86_expand_round_sse4 (rtx op0, rtx op1)
35409 enum machine_mode mode = GET_MODE (op0);
35410 rtx e1, e2, res, half;
35411 const struct real_format *fmt;
35412 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35413 rtx (*gen_copysign) (rtx, rtx, rtx);
35414 rtx (*gen_round) (rtx, rtx, rtx);
35419 gen_copysign = gen_copysignsf3;
35420 gen_round = gen_sse4_1_roundsf2;
35423 gen_copysign = gen_copysigndf3;
35424 gen_round = gen_sse4_1_rounddf2;
35427 gcc_unreachable ();
35430 /* round (a) = trunc (a + copysign (0.5, a)) */
35432 /* load nextafter (0.5, 0.0) */
35433 fmt = REAL_MODE_FORMAT (mode);
35434 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35435 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35436 half = const_double_from_real_value (pred_half, mode);
35438 /* e1 = copysign (0.5, op1) */
35439 e1 = gen_reg_rtx (mode);
35440 emit_insn (gen_copysign (e1, half, op1));
35442 /* e2 = op1 + e1 */
35443 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
35445 /* res = trunc (e2) */
35446 res = gen_reg_rtx (mode);
35447 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
35449 emit_move_insn (op0, res);
35453 /* Table of valid machine attributes. */
35454 static const struct attribute_spec ix86_attribute_table[] =
35456 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
35457 affects_type_identity } */
35458 /* Stdcall attribute says callee is responsible for popping arguments
35459 if they are not variable. */
35460 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35462 /* Fastcall attribute says callee is responsible for popping arguments
35463 if they are not variable. */
35464 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35466 /* Thiscall attribute says callee is responsible for popping arguments
35467 if they are not variable. */
35468 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35470 /* Cdecl attribute says the callee is a normal C declaration */
35471 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35473 /* Regparm attribute specifies how many integer arguments are to be
35474 passed in registers. */
35475 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
35477 /* Sseregparm attribute says we are using x86_64 calling conventions
35478 for FP arguments. */
35479 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35481 /* The transactional memory builtins are implicitly regparm or fastcall
35482 depending on the ABI. Override the generic do-nothing attribute that
35483 these builtins were declared with. */
35484 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
35486 /* force_align_arg_pointer says this function realigns the stack at entry. */
35487 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
35488 false, true, true, ix86_handle_cconv_attribute, false },
35489 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35490 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
35491 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
35492 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
35495 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
35497 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
35499 #ifdef SUBTARGET_ATTRIBUTE_TABLE
35500 SUBTARGET_ATTRIBUTE_TABLE,
35502 /* ms_abi and sysv_abi calling convention function attributes. */
35503 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
35504 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
35505 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
35507 { "callee_pop_aggregate_return", 1, 1, false, true, true,
35508 ix86_handle_callee_pop_aggregate_return, true },
35510 { NULL, 0, 0, false, false, false, NULL, false }
35513 /* Implement targetm.vectorize.builtin_vectorization_cost. */
35515 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
35516 tree vectype ATTRIBUTE_UNUSED,
35517 int misalign ATTRIBUTE_UNUSED)
35519 switch (type_of_cost)
35522 return ix86_cost->scalar_stmt_cost;
35525 return ix86_cost->scalar_load_cost;
35528 return ix86_cost->scalar_store_cost;
35531 return ix86_cost->vec_stmt_cost;
35534 return ix86_cost->vec_align_load_cost;
35537 return ix86_cost->vec_store_cost;
35539 case vec_to_scalar:
35540 return ix86_cost->vec_to_scalar_cost;
35542 case scalar_to_vec:
35543 return ix86_cost->scalar_to_vec_cost;
35545 case unaligned_load:
35546 case unaligned_store:
35547 return ix86_cost->vec_unalign_load_cost;
35549 case cond_branch_taken:
35550 return ix86_cost->cond_taken_branch_cost;
35552 case cond_branch_not_taken:
35553 return ix86_cost->cond_not_taken_branch_cost;
35556 case vec_promote_demote:
35557 return ix86_cost->vec_stmt_cost;
35560 gcc_unreachable ();
35564 /* Construct (set target (vec_select op0 (parallel perm))) and
35565 return true if that's a valid instruction in the active ISA. */
35568 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
35570 rtx rperm[MAX_VECT_LEN], x;
35573 for (i = 0; i < nelt; ++i)
35574 rperm[i] = GEN_INT (perm[i]);
35576 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
35577 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
35578 x = gen_rtx_SET (VOIDmode, target, x);
35581 if (recog_memoized (x) < 0)
35589 /* Similar, but generate a vec_concat from op0 and op1 as well. */
35592 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
35593 const unsigned char *perm, unsigned nelt)
35595 enum machine_mode v2mode;
35598 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
35599 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
35600 return expand_vselect (target, x, perm, nelt);
35603 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35604 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
35607 expand_vec_perm_blend (struct expand_vec_perm_d *d)
35609 enum machine_mode vmode = d->vmode;
35610 unsigned i, mask, nelt = d->nelt;
35611 rtx target, op0, op1, x;
35612 rtx rperm[32], vperm;
35614 if (d->op0 == d->op1)
35616 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
35618 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
35620 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
35625 /* This is a blend, not a permute. Elements must stay in their
35626 respective lanes. */
35627 for (i = 0; i < nelt; ++i)
35629 unsigned e = d->perm[i];
35630 if (!(e == i || e == i + nelt))
35637 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
35638 decision should be extracted elsewhere, so that we only try that
35639 sequence once all budget==3 options have been tried. */
35640 target = d->target;
35653 for (i = 0; i < nelt; ++i)
35654 mask |= (d->perm[i] >= nelt) << i;
35658 for (i = 0; i < 2; ++i)
35659 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
35664 for (i = 0; i < 4; ++i)
35665 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
35670 /* See if bytes move in pairs so we can use pblendw with
35671 an immediate argument, rather than pblendvb with a vector
35673 for (i = 0; i < 16; i += 2)
35674 if (d->perm[i] + 1 != d->perm[i + 1])
35677 for (i = 0; i < nelt; ++i)
35678 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
35681 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
35682 vperm = force_reg (vmode, vperm);
35684 if (GET_MODE_SIZE (vmode) == 16)
35685 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
35687 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
35691 for (i = 0; i < 8; ++i)
35692 mask |= (d->perm[i * 2] >= 16) << i;
35697 target = gen_lowpart (vmode, target);
35698 op0 = gen_lowpart (vmode, op0);
35699 op1 = gen_lowpart (vmode, op1);
35703 /* See if bytes move in pairs. If not, vpblendvb must be used. */
35704 for (i = 0; i < 32; i += 2)
35705 if (d->perm[i] + 1 != d->perm[i + 1])
35707 /* See if bytes move in quadruplets. If yes, vpblendd
35708 with immediate can be used. */
35709 for (i = 0; i < 32; i += 4)
35710 if (d->perm[i] + 2 != d->perm[i + 2])
35714 /* See if bytes move the same in both lanes. If yes,
35715 vpblendw with immediate can be used. */
35716 for (i = 0; i < 16; i += 2)
35717 if (d->perm[i] + 16 != d->perm[i + 16])
35720 /* Use vpblendw. */
35721 for (i = 0; i < 16; ++i)
35722 mask |= (d->perm[i * 2] >= 32) << i;
35727 /* Use vpblendd. */
35728 for (i = 0; i < 8; ++i)
35729 mask |= (d->perm[i * 4] >= 32) << i;
35734 /* See if words move in pairs. If yes, vpblendd can be used. */
35735 for (i = 0; i < 16; i += 2)
35736 if (d->perm[i] + 1 != d->perm[i + 1])
35740 /* See if words move the same in both lanes. If not,
35741 vpblendvb must be used. */
35742 for (i = 0; i < 8; i++)
35743 if (d->perm[i] + 8 != d->perm[i + 8])
35745 /* Use vpblendvb. */
35746 for (i = 0; i < 32; ++i)
35747 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
35751 target = gen_lowpart (vmode, target);
35752 op0 = gen_lowpart (vmode, op0);
35753 op1 = gen_lowpart (vmode, op1);
35754 goto finish_pblendvb;
35757 /* Use vpblendw. */
35758 for (i = 0; i < 16; ++i)
35759 mask |= (d->perm[i] >= 16) << i;
35763 /* Use vpblendd. */
35764 for (i = 0; i < 8; ++i)
35765 mask |= (d->perm[i * 2] >= 16) << i;
35770 /* Use vpblendd. */
35771 for (i = 0; i < 4; ++i)
35772 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
35777 gcc_unreachable ();
35780 /* This matches five different patterns with the different modes. */
35781 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
35782 x = gen_rtx_SET (VOIDmode, target, x);
35788 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35789 in terms of the variable form of vpermilps.
35791 Note that we will have already failed the immediate input vpermilps,
35792 which requires that the high and low part shuffle be identical; the
35793 variable form doesn't require that. */
35796 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
35798 rtx rperm[8], vperm;
35801 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
35804 /* We can only permute within the 128-bit lane. */
35805 for (i = 0; i < 8; ++i)
35807 unsigned e = d->perm[i];
35808 if (i < 4 ? e >= 4 : e < 4)
35815 for (i = 0; i < 8; ++i)
35817 unsigned e = d->perm[i];
35819 /* Within each 128-bit lane, the elements of op0 are numbered
35820 from 0 and the elements of op1 are numbered from 4. */
35826 rperm[i] = GEN_INT (e);
35829 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
35830 vperm = force_reg (V8SImode, vperm);
35831 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
35836 /* Return true if permutation D can be performed as VMODE permutation
35840 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
35842 unsigned int i, j, chunk;
35844 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
35845 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
35846 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
35849 if (GET_MODE_NUNITS (vmode) >= d->nelt)
35852 chunk = d->nelt / GET_MODE_NUNITS (vmode);
35853 for (i = 0; i < d->nelt; i += chunk)
35854 if (d->perm[i] & (chunk - 1))
35857 for (j = 1; j < chunk; ++j)
35858 if (d->perm[i] + j != d->perm[i + j])
35864 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35865 in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */
35868 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
35870 unsigned i, nelt, eltsz, mask;
35871 unsigned char perm[32];
35872 enum machine_mode vmode = V16QImode;
35873 rtx rperm[32], vperm, target, op0, op1;
35877 if (d->op0 != d->op1)
35879 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
35882 && valid_perm_using_mode_p (V2TImode, d))
35887 /* Use vperm2i128 insn. The pattern uses
35888 V4DImode instead of V2TImode. */
35889 target = gen_lowpart (V4DImode, d->target);
35890 op0 = gen_lowpart (V4DImode, d->op0);
35891 op1 = gen_lowpart (V4DImode, d->op1);
35893 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
35894 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
35895 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
35903 if (GET_MODE_SIZE (d->vmode) == 16)
35908 else if (GET_MODE_SIZE (d->vmode) == 32)
35913 /* V4DImode should be already handled through
35914 expand_vselect by vpermq instruction. */
35915 gcc_assert (d->vmode != V4DImode);
35918 if (d->vmode == V8SImode
35919 || d->vmode == V16HImode
35920 || d->vmode == V32QImode)
35922 /* First see if vpermq can be used for
35923 V8SImode/V16HImode/V32QImode. */
35924 if (valid_perm_using_mode_p (V4DImode, d))
35926 for (i = 0; i < 4; i++)
35927 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
35930 return expand_vselect (gen_lowpart (V4DImode, d->target),
35931 gen_lowpart (V4DImode, d->op0),
35935 /* Next see if vpermd can be used. */
35936 if (valid_perm_using_mode_p (V8SImode, d))
35940 if (vmode == V32QImode)
35942 /* vpshufb only works intra lanes, it is not
35943 possible to shuffle bytes in between the lanes. */
35944 for (i = 0; i < nelt; ++i)
35945 if ((d->perm[i] ^ i) & (nelt / 2))
35956 if (vmode == V8SImode)
35957 for (i = 0; i < 8; ++i)
35958 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
35961 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
35962 if (d->op0 != d->op1)
35963 mask = 2 * nelt - 1;
35964 else if (vmode == V16QImode)
35967 mask = nelt / 2 - 1;
35969 for (i = 0; i < nelt; ++i)
35971 unsigned j, e = d->perm[i] & mask;
35972 for (j = 0; j < eltsz; ++j)
35973 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
35977 vperm = gen_rtx_CONST_VECTOR (vmode,
35978 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
35979 vperm = force_reg (vmode, vperm);
35981 target = gen_lowpart (vmode, d->target);
35982 op0 = gen_lowpart (vmode, d->op0);
35983 if (d->op0 == d->op1)
35985 if (vmode == V16QImode)
35986 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
35987 else if (vmode == V32QImode)
35988 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
35990 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
35994 op1 = gen_lowpart (vmode, d->op1);
35995 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
36001 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
36002 in a single instruction. */
36005 expand_vec_perm_1 (struct expand_vec_perm_d *d)
36007 unsigned i, nelt = d->nelt;
36008 unsigned char perm2[MAX_VECT_LEN];
36010 /* Check plain VEC_SELECT first, because AVX has instructions that could
36011 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
36012 input where SEL+CONCAT may not. */
36013 if (d->op0 == d->op1)
36015 int mask = nelt - 1;
36016 bool identity_perm = true;
36017 bool broadcast_perm = true;
36019 for (i = 0; i < nelt; i++)
36021 perm2[i] = d->perm[i] & mask;
36023 identity_perm = false;
36025 broadcast_perm = false;
36031 emit_move_insn (d->target, d->op0);
36034 else if (broadcast_perm && TARGET_AVX2)
36036 /* Use vpbroadcast{b,w,d}. */
36037 rtx op = d->op0, (*gen) (rtx, rtx) = NULL;
36041 op = gen_lowpart (V16QImode, op);
36042 gen = gen_avx2_pbroadcastv32qi;
36045 op = gen_lowpart (V8HImode, op);
36046 gen = gen_avx2_pbroadcastv16hi;
36049 op = gen_lowpart (V4SImode, op);
36050 gen = gen_avx2_pbroadcastv8si;
36053 gen = gen_avx2_pbroadcastv16qi;
36056 gen = gen_avx2_pbroadcastv8hi;
36058 /* For other modes prefer other shuffles this function creates. */
36064 emit_insn (gen (d->target, op));
36069 if (expand_vselect (d->target, d->op0, perm2, nelt))
36072 /* There are plenty of patterns in sse.md that are written for
36073 SEL+CONCAT and are not replicated for a single op. Perhaps
36074 that should be changed, to avoid the nastiness here. */
36076 /* Recognize interleave style patterns, which means incrementing
36077 every other permutation operand. */
36078 for (i = 0; i < nelt; i += 2)
36080 perm2[i] = d->perm[i] & mask;
36081 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
36083 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
36086 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
36089 for (i = 0; i < nelt; i += 4)
36091 perm2[i + 0] = d->perm[i + 0] & mask;
36092 perm2[i + 1] = d->perm[i + 1] & mask;
36093 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
36094 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
36097 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
36102 /* Finally, try the fully general two operand permute. */
36103 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
36106 /* Recognize interleave style patterns with reversed operands. */
36107 if (d->op0 != d->op1)
36109 for (i = 0; i < nelt; ++i)
36111 unsigned e = d->perm[i];
36119 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
36123 /* Try the SSE4.1 blend variable merge instructions. */
36124 if (expand_vec_perm_blend (d))
36127 /* Try one of the AVX vpermil variable permutations. */
36128 if (expand_vec_perm_vpermil (d))
36131 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
36132 vpshufb, vpermd or vpermq variable permutation. */
36133 if (expand_vec_perm_pshufb (d))
36139 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36140 in terms of a pair of pshuflw + pshufhw instructions. */
36143 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
36145 unsigned char perm2[MAX_VECT_LEN];
36149 if (d->vmode != V8HImode || d->op0 != d->op1)
36152 /* The two permutations only operate in 64-bit lanes. */
36153 for (i = 0; i < 4; ++i)
36154 if (d->perm[i] >= 4)
36156 for (i = 4; i < 8; ++i)
36157 if (d->perm[i] < 4)
36163 /* Emit the pshuflw. */
36164 memcpy (perm2, d->perm, 4);
36165 for (i = 4; i < 8; ++i)
36167 ok = expand_vselect (d->target, d->op0, perm2, 8);
36170 /* Emit the pshufhw. */
36171 memcpy (perm2 + 4, d->perm + 4, 4);
36172 for (i = 0; i < 4; ++i)
36174 ok = expand_vselect (d->target, d->target, perm2, 8);
36180 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36181 the permutation using the SSSE3 palignr instruction. This succeeds
36182 when all of the elements in PERM fit within one vector and we merely
36183 need to shift them down so that a single vector permutation has a
36184 chance to succeed. */
36187 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
36189 unsigned i, nelt = d->nelt;
36194 /* Even with AVX, palignr only operates on 128-bit vectors. */
36195 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
36198 min = nelt, max = 0;
36199 for (i = 0; i < nelt; ++i)
36201 unsigned e = d->perm[i];
36207 if (min == 0 || max - min >= nelt)
36210 /* Given that we have SSSE3, we know we'll be able to implement the
36211 single operand permutation after the palignr with pshufb. */
36215 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
36216 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
36217 gen_lowpart (TImode, d->op1),
36218 gen_lowpart (TImode, d->op0), shift));
36220 d->op0 = d->op1 = d->target;
36223 for (i = 0; i < nelt; ++i)
36225 unsigned e = d->perm[i] - min;
36231 /* Test for the degenerate case where the alignment by itself
36232 produces the desired permutation. */
36236 ok = expand_vec_perm_1 (d);
36242 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
36244 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36245 a two vector permutation into a single vector permutation by using
36246 an interleave operation to merge the vectors. */
36249 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
36251 struct expand_vec_perm_d dremap, dfinal;
36252 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
36253 unsigned HOST_WIDE_INT contents;
36254 unsigned char remap[2 * MAX_VECT_LEN];
36256 bool ok, same_halves = false;
36258 if (GET_MODE_SIZE (d->vmode) == 16)
36260 if (d->op0 == d->op1)
36263 else if (GET_MODE_SIZE (d->vmode) == 32)
36267 /* For 32-byte modes allow even d->op0 == d->op1.
36268 The lack of cross-lane shuffling in some instructions
36269 might prevent a single insn shuffle. */
36271 dfinal.testing_p = true;
36272 /* If expand_vec_perm_interleave3 can expand this into
36273 a 3 insn sequence, give up and let it be expanded as
36274 3 insn sequence. While that is one insn longer,
36275 it doesn't need a memory operand and in the common
36276 case that both interleave low and high permutations
36277 with the same operands are adjacent needs 4 insns
36278 for both after CSE. */
36279 if (expand_vec_perm_interleave3 (&dfinal))
36285 /* Examine from whence the elements come. */
36287 for (i = 0; i < nelt; ++i)
36288 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
36290 memset (remap, 0xff, sizeof (remap));
36293 if (GET_MODE_SIZE (d->vmode) == 16)
36295 unsigned HOST_WIDE_INT h1, h2, h3, h4;
36297 /* Split the two input vectors into 4 halves. */
36298 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
36303 /* If the elements from the low halves use interleave low, and similarly
36304 for interleave high. If the elements are from mis-matched halves, we
36305 can use shufps for V4SF/V4SI or do a DImode shuffle. */
36306 if ((contents & (h1 | h3)) == contents)
36309 for (i = 0; i < nelt2; ++i)
36312 remap[i + nelt] = i * 2 + 1;
36313 dremap.perm[i * 2] = i;
36314 dremap.perm[i * 2 + 1] = i + nelt;
36316 if (!TARGET_SSE2 && d->vmode == V4SImode)
36317 dremap.vmode = V4SFmode;
36319 else if ((contents & (h2 | h4)) == contents)
36322 for (i = 0; i < nelt2; ++i)
36324 remap[i + nelt2] = i * 2;
36325 remap[i + nelt + nelt2] = i * 2 + 1;
36326 dremap.perm[i * 2] = i + nelt2;
36327 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
36329 if (!TARGET_SSE2 && d->vmode == V4SImode)
36330 dremap.vmode = V4SFmode;
36332 else if ((contents & (h1 | h4)) == contents)
36335 for (i = 0; i < nelt2; ++i)
36338 remap[i + nelt + nelt2] = i + nelt2;
36339 dremap.perm[i] = i;
36340 dremap.perm[i + nelt2] = i + nelt + nelt2;
36345 dremap.vmode = V2DImode;
36347 dremap.perm[0] = 0;
36348 dremap.perm[1] = 3;
36351 else if ((contents & (h2 | h3)) == contents)
36354 for (i = 0; i < nelt2; ++i)
36356 remap[i + nelt2] = i;
36357 remap[i + nelt] = i + nelt2;
36358 dremap.perm[i] = i + nelt2;
36359 dremap.perm[i + nelt2] = i + nelt;
36364 dremap.vmode = V2DImode;
36366 dremap.perm[0] = 1;
36367 dremap.perm[1] = 2;
36375 unsigned int nelt4 = nelt / 4, nzcnt = 0;
36376 unsigned HOST_WIDE_INT q[8];
36377 unsigned int nonzero_halves[4];
36379 /* Split the two input vectors into 8 quarters. */
36380 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
36381 for (i = 1; i < 8; ++i)
36382 q[i] = q[0] << (nelt4 * i);
36383 for (i = 0; i < 4; ++i)
36384 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
36386 nonzero_halves[nzcnt] = i;
36392 gcc_assert (d->op0 == d->op1);
36393 nonzero_halves[1] = nonzero_halves[0];
36394 same_halves = true;
36396 else if (d->op0 == d->op1)
36398 gcc_assert (nonzero_halves[0] == 0);
36399 gcc_assert (nonzero_halves[1] == 1);
36404 if (d->perm[0] / nelt2 == nonzero_halves[1])
36406 /* Attempt to increase the likelyhood that dfinal
36407 shuffle will be intra-lane. */
36408 char tmph = nonzero_halves[0];
36409 nonzero_halves[0] = nonzero_halves[1];
36410 nonzero_halves[1] = tmph;
36413 /* vperm2f128 or vperm2i128. */
36414 for (i = 0; i < nelt2; ++i)
36416 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
36417 remap[i + nonzero_halves[0] * nelt2] = i;
36418 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
36419 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
36422 if (d->vmode != V8SFmode
36423 && d->vmode != V4DFmode
36424 && d->vmode != V8SImode)
36426 dremap.vmode = V8SImode;
36428 for (i = 0; i < 4; ++i)
36430 dremap.perm[i] = i + nonzero_halves[0] * 4;
36431 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
36435 else if (d->op0 == d->op1)
36437 else if (TARGET_AVX2
36438 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
36441 for (i = 0; i < nelt4; ++i)
36444 remap[i + nelt] = i * 2 + 1;
36445 remap[i + nelt2] = i * 2 + nelt2;
36446 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
36447 dremap.perm[i * 2] = i;
36448 dremap.perm[i * 2 + 1] = i + nelt;
36449 dremap.perm[i * 2 + nelt2] = i + nelt2;
36450 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
36453 else if (TARGET_AVX2
36454 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
36457 for (i = 0; i < nelt4; ++i)
36459 remap[i + nelt4] = i * 2;
36460 remap[i + nelt + nelt4] = i * 2 + 1;
36461 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
36462 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
36463 dremap.perm[i * 2] = i + nelt4;
36464 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
36465 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
36466 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
36473 /* Use the remapping array set up above to move the elements from their
36474 swizzled locations into their final destinations. */
36476 for (i = 0; i < nelt; ++i)
36478 unsigned e = remap[d->perm[i]];
36479 gcc_assert (e < nelt);
36480 /* If same_halves is true, both halves of the remapped vector are the
36481 same. Avoid cross-lane accesses if possible. */
36482 if (same_halves && i >= nelt2)
36484 gcc_assert (e < nelt2);
36485 dfinal.perm[i] = e + nelt2;
36488 dfinal.perm[i] = e;
36490 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
36491 dfinal.op1 = dfinal.op0;
36492 dremap.target = dfinal.op0;
36494 /* Test if the final remap can be done with a single insn. For V4SFmode or
36495 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
36497 ok = expand_vec_perm_1 (&dfinal);
36498 seq = get_insns ();
36507 if (dremap.vmode != dfinal.vmode)
36509 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
36510 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
36511 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
36514 ok = expand_vec_perm_1 (&dremap);
36521 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36522 a single vector cross-lane permutation into vpermq followed
36523 by any of the single insn permutations. */
36526 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
36528 struct expand_vec_perm_d dremap, dfinal;
36529 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
36530 unsigned contents[2];
36534 && (d->vmode == V32QImode || d->vmode == V16HImode)
36535 && d->op0 == d->op1))
36540 for (i = 0; i < nelt2; ++i)
36542 contents[0] |= 1u << (d->perm[i] / nelt4);
36543 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
36546 for (i = 0; i < 2; ++i)
36548 unsigned int cnt = 0;
36549 for (j = 0; j < 4; ++j)
36550 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
36558 dremap.vmode = V4DImode;
36560 dremap.target = gen_reg_rtx (V4DImode);
36561 dremap.op0 = gen_lowpart (V4DImode, d->op0);
36562 dremap.op1 = dremap.op0;
36563 for (i = 0; i < 2; ++i)
36565 unsigned int cnt = 0;
36566 for (j = 0; j < 4; ++j)
36567 if ((contents[i] & (1u << j)) != 0)
36568 dremap.perm[2 * i + cnt++] = j;
36569 for (; cnt < 2; ++cnt)
36570 dremap.perm[2 * i + cnt] = 0;
36574 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
36575 dfinal.op1 = dfinal.op0;
36576 for (i = 0, j = 0; i < nelt; ++i)
36580 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
36581 if ((d->perm[i] / nelt4) == dremap.perm[j])
36583 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
36584 dfinal.perm[i] |= nelt4;
36586 gcc_unreachable ();
36589 ok = expand_vec_perm_1 (&dremap);
36592 ok = expand_vec_perm_1 (&dfinal);
36598 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36599 a two vector permutation using 2 intra-lane interleave insns
36600 and cross-lane shuffle for 32-byte vectors. */
36603 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
36606 rtx (*gen) (rtx, rtx, rtx);
36608 if (d->op0 == d->op1)
36610 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
36612 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
36618 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
36620 for (i = 0; i < nelt; i += 2)
36621 if (d->perm[i] != d->perm[0] + i / 2
36622 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
36632 gen = gen_vec_interleave_highv32qi;
36634 gen = gen_vec_interleave_lowv32qi;
36638 gen = gen_vec_interleave_highv16hi;
36640 gen = gen_vec_interleave_lowv16hi;
36644 gen = gen_vec_interleave_highv8si;
36646 gen = gen_vec_interleave_lowv8si;
36650 gen = gen_vec_interleave_highv4di;
36652 gen = gen_vec_interleave_lowv4di;
36656 gen = gen_vec_interleave_highv8sf;
36658 gen = gen_vec_interleave_lowv8sf;
36662 gen = gen_vec_interleave_highv4df;
36664 gen = gen_vec_interleave_lowv4df;
36667 gcc_unreachable ();
36670 emit_insn (gen (d->target, d->op0, d->op1));
36674 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
36675 permutation with two pshufb insns and an ior. We should have already
36676 failed all two instruction sequences. */
36679 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
36681 rtx rperm[2][16], vperm, l, h, op, m128;
36682 unsigned int i, nelt, eltsz;
36684 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
36686 gcc_assert (d->op0 != d->op1);
36689 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36691 /* Generate two permutation masks. If the required element is within
36692 the given vector it is shuffled into the proper lane. If the required
36693 element is in the other vector, force a zero into the lane by setting
36694 bit 7 in the permutation mask. */
36695 m128 = GEN_INT (-128);
36696 for (i = 0; i < nelt; ++i)
36698 unsigned j, e = d->perm[i];
36699 unsigned which = (e >= nelt);
36703 for (j = 0; j < eltsz; ++j)
36705 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
36706 rperm[1-which][i*eltsz + j] = m128;
36710 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
36711 vperm = force_reg (V16QImode, vperm);
36713 l = gen_reg_rtx (V16QImode);
36714 op = gen_lowpart (V16QImode, d->op0);
36715 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
36717 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
36718 vperm = force_reg (V16QImode, vperm);
36720 h = gen_reg_rtx (V16QImode);
36721 op = gen_lowpart (V16QImode, d->op1);
36722 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
36724 op = gen_lowpart (V16QImode, d->target);
36725 emit_insn (gen_iorv16qi3 (op, l, h));
36730 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
36731 with two vpshufb insns, vpermq and vpor. We should have already failed
36732 all two or three instruction sequences. */
36735 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
36737 rtx rperm[2][32], vperm, l, h, hp, op, m128;
36738 unsigned int i, nelt, eltsz;
36741 || d->op0 != d->op1
36742 || (d->vmode != V32QImode && d->vmode != V16HImode))
36749 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36751 /* Generate two permutation masks. If the required element is within
36752 the same lane, it is shuffled in. If the required element from the
36753 other lane, force a zero by setting bit 7 in the permutation mask.
36754 In the other mask the mask has non-negative elements if element
36755 is requested from the other lane, but also moved to the other lane,
36756 so that the result of vpshufb can have the two V2TImode halves
36758 m128 = GEN_INT (-128);
36759 for (i = 0; i < nelt; ++i)
36761 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
36762 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
36764 for (j = 0; j < eltsz; ++j)
36766 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
36767 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
36771 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
36772 vperm = force_reg (V32QImode, vperm);
36774 h = gen_reg_rtx (V32QImode);
36775 op = gen_lowpart (V32QImode, d->op0);
36776 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
36778 /* Swap the 128-byte lanes of h into hp. */
36779 hp = gen_reg_rtx (V4DImode);
36780 op = gen_lowpart (V4DImode, h);
36781 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
36784 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
36785 vperm = force_reg (V32QImode, vperm);
36787 l = gen_reg_rtx (V32QImode);
36788 op = gen_lowpart (V32QImode, d->op0);
36789 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
36791 op = gen_lowpart (V32QImode, d->target);
36792 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
36797 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
36798 and extract-odd permutations of two V32QImode and V16QImode operand
36799 with two vpshufb insns, vpor and vpermq. We should have already
36800 failed all two or three instruction sequences. */
36803 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
36805 rtx rperm[2][32], vperm, l, h, ior, op, m128;
36806 unsigned int i, nelt, eltsz;
36809 || d->op0 == d->op1
36810 || (d->vmode != V32QImode && d->vmode != V16HImode))
36813 for (i = 0; i < d->nelt; ++i)
36814 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
36821 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36823 /* Generate two permutation masks. In the first permutation mask
36824 the first quarter will contain indexes for the first half
36825 of the op0, the second quarter will contain bit 7 set, third quarter
36826 will contain indexes for the second half of the op0 and the
36827 last quarter bit 7 set. In the second permutation mask
36828 the first quarter will contain bit 7 set, the second quarter
36829 indexes for the first half of the op1, the third quarter bit 7 set
36830 and last quarter indexes for the second half of the op1.
36831 I.e. the first mask e.g. for V32QImode extract even will be:
36832 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
36833 (all values masked with 0xf except for -128) and second mask
36834 for extract even will be
36835 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
36836 m128 = GEN_INT (-128);
36837 for (i = 0; i < nelt; ++i)
36839 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
36840 unsigned which = d->perm[i] >= nelt;
36841 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
36843 for (j = 0; j < eltsz; ++j)
36845 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
36846 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
36850 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
36851 vperm = force_reg (V32QImode, vperm);
36853 l = gen_reg_rtx (V32QImode);
36854 op = gen_lowpart (V32QImode, d->op0);
36855 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
36857 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
36858 vperm = force_reg (V32QImode, vperm);
36860 h = gen_reg_rtx (V32QImode);
36861 op = gen_lowpart (V32QImode, d->op1);
36862 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
36864 ior = gen_reg_rtx (V32QImode);
36865 emit_insn (gen_iorv32qi3 (ior, l, h));
36867 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
36868 op = gen_lowpart (V4DImode, d->target);
36869 ior = gen_lowpart (V4DImode, ior);
36870 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
36871 const1_rtx, GEN_INT (3)));
36876 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
36877 and extract-odd permutations. */
36880 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
36887 t1 = gen_reg_rtx (V4DFmode);
36888 t2 = gen_reg_rtx (V4DFmode);
36890 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
36891 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
36892 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
36894 /* Now an unpck[lh]pd will produce the result required. */
36896 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
36898 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
36904 int mask = odd ? 0xdd : 0x88;
36906 t1 = gen_reg_rtx (V8SFmode);
36907 t2 = gen_reg_rtx (V8SFmode);
36908 t3 = gen_reg_rtx (V8SFmode);
36910 /* Shuffle within the 128-bit lanes to produce:
36911 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
36912 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
36915 /* Shuffle the lanes around to produce:
36916 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
36917 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
36920 /* Shuffle within the 128-bit lanes to produce:
36921 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
36922 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
36924 /* Shuffle within the 128-bit lanes to produce:
36925 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
36926 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
36928 /* Shuffle the lanes around to produce:
36929 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
36930 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
36939 /* These are always directly implementable by expand_vec_perm_1. */
36940 gcc_unreachable ();
36944 return expand_vec_perm_pshufb2 (d);
36947 /* We need 2*log2(N)-1 operations to achieve odd/even
36948 with interleave. */
36949 t1 = gen_reg_rtx (V8HImode);
36950 t2 = gen_reg_rtx (V8HImode);
36951 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
36952 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
36953 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
36954 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
36956 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
36958 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
36965 return expand_vec_perm_pshufb2 (d);
36968 t1 = gen_reg_rtx (V16QImode);
36969 t2 = gen_reg_rtx (V16QImode);
36970 t3 = gen_reg_rtx (V16QImode);
36971 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
36972 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
36973 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
36974 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
36975 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
36976 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
36978 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
36980 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
36987 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
36992 struct expand_vec_perm_d d_copy = *d;
36993 d_copy.vmode = V4DFmode;
36994 d_copy.target = gen_lowpart (V4DFmode, d->target);
36995 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
36996 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
36997 return expand_vec_perm_even_odd_1 (&d_copy, odd);
37000 t1 = gen_reg_rtx (V4DImode);
37001 t2 = gen_reg_rtx (V4DImode);
37003 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37004 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
37005 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
37007 /* Now an vpunpck[lh]qdq will produce the result required. */
37009 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
37011 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
37018 struct expand_vec_perm_d d_copy = *d;
37019 d_copy.vmode = V8SFmode;
37020 d_copy.target = gen_lowpart (V8SFmode, d->target);
37021 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
37022 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
37023 return expand_vec_perm_even_odd_1 (&d_copy, odd);
37026 t1 = gen_reg_rtx (V8SImode);
37027 t2 = gen_reg_rtx (V8SImode);
37029 /* Shuffle the lanes around into
37030 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
37031 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t1),
37032 gen_lowpart (V4DImode, d->op0),
37033 gen_lowpart (V4DImode, d->op1),
37035 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t2),
37036 gen_lowpart (V4DImode, d->op0),
37037 gen_lowpart (V4DImode, d->op1),
37040 /* Swap the 2nd and 3rd position in each lane into
37041 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
37042 emit_insn (gen_avx2_pshufdv3 (t1, t1,
37043 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37044 emit_insn (gen_avx2_pshufdv3 (t2, t2,
37045 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37047 /* Now an vpunpck[lh]qdq will produce
37048 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
37050 t3 = gen_avx2_interleave_highv4di (gen_lowpart (V4DImode, d->target),
37051 gen_lowpart (V4DImode, t1),
37052 gen_lowpart (V4DImode, t2));
37054 t3 = gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode, d->target),
37055 gen_lowpart (V4DImode, t1),
37056 gen_lowpart (V4DImode, t2));
37061 gcc_unreachable ();
37067 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37068 extract-even and extract-odd permutations. */
37071 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
37073 unsigned i, odd, nelt = d->nelt;
37076 if (odd != 0 && odd != 1)
37079 for (i = 1; i < nelt; ++i)
37080 if (d->perm[i] != 2 * i + odd)
37083 return expand_vec_perm_even_odd_1 (d, odd);
37086 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
37087 permutations. We assume that expand_vec_perm_1 has already failed. */
37090 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
37092 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
37093 enum machine_mode vmode = d->vmode;
37094 unsigned char perm2[4];
37102 /* These are special-cased in sse.md so that we can optionally
37103 use the vbroadcast instruction. They expand to two insns
37104 if the input happens to be in a register. */
37105 gcc_unreachable ();
37111 /* These are always implementable using standard shuffle patterns. */
37112 gcc_unreachable ();
37116 /* These can be implemented via interleave. We save one insn by
37117 stopping once we have promoted to V4SImode and then use pshufd. */
37121 rtx (*gen) (rtx, rtx, rtx)
37122 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
37123 : gen_vec_interleave_lowv8hi;
37127 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
37128 : gen_vec_interleave_highv8hi;
37133 dest = gen_reg_rtx (vmode);
37134 emit_insn (gen (dest, op0, op0));
37135 vmode = get_mode_wider_vector (vmode);
37136 op0 = gen_lowpart (vmode, dest);
37138 while (vmode != V4SImode);
37140 memset (perm2, elt, 4);
37141 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
37149 /* For AVX2 broadcasts of the first element vpbroadcast* or
37150 vpermq should be used by expand_vec_perm_1. */
37151 gcc_assert (!TARGET_AVX2 || d->perm[0]);
37155 gcc_unreachable ();
37159 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37160 broadcast permutations. */
37163 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
37165 unsigned i, elt, nelt = d->nelt;
37167 if (d->op0 != d->op1)
37171 for (i = 1; i < nelt; ++i)
37172 if (d->perm[i] != elt)
37175 return expand_vec_perm_broadcast_1 (d);
37178 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
37179 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
37180 all the shorter instruction sequences. */
37183 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
37185 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
37186 unsigned int i, nelt, eltsz;
37190 || d->op0 == d->op1
37191 || (d->vmode != V32QImode && d->vmode != V16HImode))
37198 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
37200 /* Generate 4 permutation masks. If the required element is within
37201 the same lane, it is shuffled in. If the required element from the
37202 other lane, force a zero by setting bit 7 in the permutation mask.
37203 In the other mask the mask has non-negative elements if element
37204 is requested from the other lane, but also moved to the other lane,
37205 so that the result of vpshufb can have the two V2TImode halves
37207 m128 = GEN_INT (-128);
37208 for (i = 0; i < 32; ++i)
37210 rperm[0][i] = m128;
37211 rperm[1][i] = m128;
37212 rperm[2][i] = m128;
37213 rperm[3][i] = m128;
37219 for (i = 0; i < nelt; ++i)
37221 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
37222 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
37223 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
37225 for (j = 0; j < eltsz; ++j)
37226 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
37227 used[which] = true;
37230 for (i = 0; i < 2; ++i)
37232 if (!used[2 * i + 1])
37237 vperm = gen_rtx_CONST_VECTOR (V32QImode,
37238 gen_rtvec_v (32, rperm[2 * i + 1]));
37239 vperm = force_reg (V32QImode, vperm);
37240 h[i] = gen_reg_rtx (V32QImode);
37241 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
37242 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
37245 /* Swap the 128-byte lanes of h[X]. */
37246 for (i = 0; i < 2; ++i)
37248 if (h[i] == NULL_RTX)
37250 op = gen_reg_rtx (V4DImode);
37251 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
37252 const2_rtx, GEN_INT (3), const0_rtx,
37254 h[i] = gen_lowpart (V32QImode, op);
37257 for (i = 0; i < 2; ++i)
37264 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
37265 vperm = force_reg (V32QImode, vperm);
37266 l[i] = gen_reg_rtx (V32QImode);
37267 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
37268 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
37271 for (i = 0; i < 2; ++i)
37275 op = gen_reg_rtx (V32QImode);
37276 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
37283 gcc_assert (l[0] && l[1]);
37284 op = gen_lowpart (V32QImode, d->target);
37285 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
37289 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
37290 With all of the interface bits taken care of, perform the expansion
37291 in D and return true on success. */
37294 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
37296 /* Try a single instruction expansion. */
37297 if (expand_vec_perm_1 (d))
37300 /* Try sequences of two instructions. */
37302 if (expand_vec_perm_pshuflw_pshufhw (d))
37305 if (expand_vec_perm_palignr (d))
37308 if (expand_vec_perm_interleave2 (d))
37311 if (expand_vec_perm_broadcast (d))
37314 if (expand_vec_perm_vpermq_perm_1 (d))
37317 /* Try sequences of three instructions. */
37319 if (expand_vec_perm_pshufb2 (d))
37322 if (expand_vec_perm_interleave3 (d))
37325 /* Try sequences of four instructions. */
37327 if (expand_vec_perm_vpshufb2_vpermq (d))
37330 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
37333 /* ??? Look for narrow permutations whose element orderings would
37334 allow the promotion to a wider mode. */
37336 /* ??? Look for sequences of interleave or a wider permute that place
37337 the data into the correct lanes for a half-vector shuffle like
37338 pshuf[lh]w or vpermilps. */
37340 /* ??? Look for sequences of interleave that produce the desired results.
37341 The combinatorics of punpck[lh] get pretty ugly... */
37343 if (expand_vec_perm_even_odd (d))
37346 /* Even longer sequences. */
37347 if (expand_vec_perm_vpshufb4_vpermq2 (d))
37354 ix86_expand_vec_perm_const (rtx operands[4])
37356 struct expand_vec_perm_d d;
37357 unsigned char perm[MAX_VECT_LEN];
37358 int i, nelt, which;
37361 d.target = operands[0];
37362 d.op0 = operands[1];
37363 d.op1 = operands[2];
37366 d.vmode = GET_MODE (d.target);
37367 gcc_assert (VECTOR_MODE_P (d.vmode));
37368 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37369 d.testing_p = false;
37371 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
37372 gcc_assert (XVECLEN (sel, 0) == nelt);
37373 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
37375 for (i = which = 0; i < nelt; ++i)
37377 rtx e = XVECEXP (sel, 0, i);
37378 int ei = INTVAL (e) & (2 * nelt - 1);
37380 which |= (ei < nelt ? 1 : 2);
37391 if (!rtx_equal_p (d.op0, d.op1))
37394 /* The elements of PERM do not suggest that only the first operand
37395 is used, but both operands are identical. Allow easier matching
37396 of the permutation by folding the permutation into the single
37398 for (i = 0; i < nelt; ++i)
37399 if (d.perm[i] >= nelt)
37408 for (i = 0; i < nelt; ++i)
37414 if (ix86_expand_vec_perm_const_1 (&d))
37417 /* If the mask says both arguments are needed, but they are the same,
37418 the above tried to expand with d.op0 == d.op1. If that didn't work,
37419 retry with d.op0 != d.op1 as that is what testing has been done with. */
37420 if (which == 3 && d.op0 == d.op1)
37425 memcpy (d.perm, perm, sizeof (perm));
37426 d.op1 = gen_reg_rtx (d.vmode);
37428 ok = ix86_expand_vec_perm_const_1 (&d);
37429 seq = get_insns ();
37433 emit_move_insn (d.op1, d.op0);
37442 /* Implement targetm.vectorize.vec_perm_const_ok. */
37445 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
37446 const unsigned char *sel)
37448 struct expand_vec_perm_d d;
37449 unsigned int i, nelt, which;
37453 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37454 d.testing_p = true;
37456 /* Given sufficient ISA support we can just return true here
37457 for selected vector modes. */
37458 if (GET_MODE_SIZE (d.vmode) == 16)
37460 /* All implementable with a single vpperm insn. */
37463 /* All implementable with 2 pshufb + 1 ior. */
37466 /* All implementable with shufpd or unpck[lh]pd. */
37471 /* Extract the values from the vector CST into the permutation
37473 memcpy (d.perm, sel, nelt);
37474 for (i = which = 0; i < nelt; ++i)
37476 unsigned char e = d.perm[i];
37477 gcc_assert (e < 2 * nelt);
37478 which |= (e < nelt ? 1 : 2);
37481 /* For all elements from second vector, fold the elements to first. */
37483 for (i = 0; i < nelt; ++i)
37486 /* Check whether the mask can be applied to the vector type. */
37487 one_vec = (which != 3);
37489 /* Implementable with shufps or pshufd. */
37490 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
37493 /* Otherwise we have to go through the motions and see if we can
37494 figure out how to generate the requested permutation. */
37495 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
37496 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
37498 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
37501 ret = ix86_expand_vec_perm_const_1 (&d);
37508 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
37510 struct expand_vec_perm_d d;
37516 d.vmode = GET_MODE (targ);
37517 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37518 d.testing_p = false;
37520 for (i = 0; i < nelt; ++i)
37521 d.perm[i] = i * 2 + odd;
37523 /* We'll either be able to implement the permutation directly... */
37524 if (expand_vec_perm_1 (&d))
37527 /* ... or we use the special-case patterns. */
37528 expand_vec_perm_even_odd_1 (&d, odd);
37531 /* Expand an insert into a vector register through pinsr insn.
37532 Return true if successful. */
37535 ix86_expand_pinsr (rtx *operands)
37537 rtx dst = operands[0];
37538 rtx src = operands[3];
37540 unsigned int size = INTVAL (operands[1]);
37541 unsigned int pos = INTVAL (operands[2]);
37543 if (GET_CODE (dst) == SUBREG)
37545 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
37546 dst = SUBREG_REG (dst);
37549 if (GET_CODE (src) == SUBREG)
37550 src = SUBREG_REG (src);
37552 switch (GET_MODE (dst))
37559 enum machine_mode srcmode, dstmode;
37560 rtx (*pinsr)(rtx, rtx, rtx, rtx);
37562 srcmode = mode_for_size (size, MODE_INT, 0);
37567 if (!TARGET_SSE4_1)
37569 dstmode = V16QImode;
37570 pinsr = gen_sse4_1_pinsrb;
37576 dstmode = V8HImode;
37577 pinsr = gen_sse2_pinsrw;
37581 if (!TARGET_SSE4_1)
37583 dstmode = V4SImode;
37584 pinsr = gen_sse4_1_pinsrd;
37588 gcc_assert (TARGET_64BIT);
37589 if (!TARGET_SSE4_1)
37591 dstmode = V2DImode;
37592 pinsr = gen_sse4_1_pinsrq;
37599 dst = gen_lowpart (dstmode, dst);
37600 src = gen_lowpart (srcmode, src);
37604 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
37613 /* This function returns the calling abi specific va_list type node.
37614 It returns the FNDECL specific va_list type. */
37617 ix86_fn_abi_va_list (tree fndecl)
37620 return va_list_type_node;
37621 gcc_assert (fndecl != NULL_TREE);
37623 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
37624 return ms_va_list_type_node;
37626 return sysv_va_list_type_node;
37629 /* Returns the canonical va_list type specified by TYPE. If there
37630 is no valid TYPE provided, it return NULL_TREE. */
37633 ix86_canonical_va_list_type (tree type)
37637 /* Resolve references and pointers to va_list type. */
37638 if (TREE_CODE (type) == MEM_REF)
37639 type = TREE_TYPE (type);
37640 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
37641 type = TREE_TYPE (type);
37642 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
37643 type = TREE_TYPE (type);
37645 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
37647 wtype = va_list_type_node;
37648 gcc_assert (wtype != NULL_TREE);
37650 if (TREE_CODE (wtype) == ARRAY_TYPE)
37652 /* If va_list is an array type, the argument may have decayed
37653 to a pointer type, e.g. by being passed to another function.
37654 In that case, unwrap both types so that we can compare the
37655 underlying records. */
37656 if (TREE_CODE (htype) == ARRAY_TYPE
37657 || POINTER_TYPE_P (htype))
37659 wtype = TREE_TYPE (wtype);
37660 htype = TREE_TYPE (htype);
37663 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37664 return va_list_type_node;
37665 wtype = sysv_va_list_type_node;
37666 gcc_assert (wtype != NULL_TREE);
37668 if (TREE_CODE (wtype) == ARRAY_TYPE)
37670 /* If va_list is an array type, the argument may have decayed
37671 to a pointer type, e.g. by being passed to another function.
37672 In that case, unwrap both types so that we can compare the
37673 underlying records. */
37674 if (TREE_CODE (htype) == ARRAY_TYPE
37675 || POINTER_TYPE_P (htype))
37677 wtype = TREE_TYPE (wtype);
37678 htype = TREE_TYPE (htype);
37681 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37682 return sysv_va_list_type_node;
37683 wtype = ms_va_list_type_node;
37684 gcc_assert (wtype != NULL_TREE);
37686 if (TREE_CODE (wtype) == ARRAY_TYPE)
37688 /* If va_list is an array type, the argument may have decayed
37689 to a pointer type, e.g. by being passed to another function.
37690 In that case, unwrap both types so that we can compare the
37691 underlying records. */
37692 if (TREE_CODE (htype) == ARRAY_TYPE
37693 || POINTER_TYPE_P (htype))
37695 wtype = TREE_TYPE (wtype);
37696 htype = TREE_TYPE (htype);
37699 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37700 return ms_va_list_type_node;
37703 return std_canonical_va_list_type (type);
37706 /* Iterate through the target-specific builtin types for va_list.
37707 IDX denotes the iterator, *PTREE is set to the result type of
37708 the va_list builtin, and *PNAME to its internal type.
37709 Returns zero if there is no element for this index, otherwise
37710 IDX should be increased upon the next call.
37711 Note, do not iterate a base builtin's name like __builtin_va_list.
37712 Used from c_common_nodes_and_builtins. */
37715 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
37725 *ptree = ms_va_list_type_node;
37726 *pname = "__builtin_ms_va_list";
37730 *ptree = sysv_va_list_type_node;
37731 *pname = "__builtin_sysv_va_list";
37739 #undef TARGET_SCHED_DISPATCH
37740 #define TARGET_SCHED_DISPATCH has_dispatch
37741 #undef TARGET_SCHED_DISPATCH_DO
37742 #define TARGET_SCHED_DISPATCH_DO do_dispatch
37743 #undef TARGET_SCHED_REASSOCIATION_WIDTH
37744 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
37746 /* The size of the dispatch window is the total number of bytes of
37747 object code allowed in a window. */
37748 #define DISPATCH_WINDOW_SIZE 16
37750 /* Number of dispatch windows considered for scheduling. */
37751 #define MAX_DISPATCH_WINDOWS 3
37753 /* Maximum number of instructions in a window. */
37756 /* Maximum number of immediate operands in a window. */
37759 /* Maximum number of immediate bits allowed in a window. */
37760 #define MAX_IMM_SIZE 128
37762 /* Maximum number of 32 bit immediates allowed in a window. */
37763 #define MAX_IMM_32 4
37765 /* Maximum number of 64 bit immediates allowed in a window. */
37766 #define MAX_IMM_64 2
37768 /* Maximum total of loads or prefetches allowed in a window. */
37771 /* Maximum total of stores allowed in a window. */
37772 #define MAX_STORE 1
37778 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
37779 enum dispatch_group {
37794 /* Number of allowable groups in a dispatch window. It is an array
37795 indexed by dispatch_group enum. 100 is used as a big number,
37796 because the number of these kind of operations does not have any
37797 effect in dispatch window, but we need them for other reasons in
37799 static unsigned int num_allowable_groups[disp_last] = {
37800 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
37803 char group_name[disp_last + 1][16] = {
37804 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
37805 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
37806 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
37809 /* Instruction path. */
37812 path_single, /* Single micro op. */
37813 path_double, /* Double micro op. */
37814 path_multi, /* Instructions with more than 2 micro op.. */
37818 /* sched_insn_info defines a window to the instructions scheduled in
37819 the basic block. It contains a pointer to the insn_info table and
37820 the instruction scheduled.
37822 Windows are allocated for each basic block and are linked
37824 typedef struct sched_insn_info_s {
37826 enum dispatch_group group;
37827 enum insn_path path;
37832 /* Linked list of dispatch windows. This is a two way list of
37833 dispatch windows of a basic block. It contains information about
37834 the number of uops in the window and the total number of
37835 instructions and of bytes in the object code for this dispatch
37837 typedef struct dispatch_windows_s {
37838 int num_insn; /* Number of insn in the window. */
37839 int num_uops; /* Number of uops in the window. */
37840 int window_size; /* Number of bytes in the window. */
37841 int window_num; /* Window number between 0 or 1. */
37842 int num_imm; /* Number of immediates in an insn. */
37843 int num_imm_32; /* Number of 32 bit immediates in an insn. */
37844 int num_imm_64; /* Number of 64 bit immediates in an insn. */
37845 int imm_size; /* Total immediates in the window. */
37846 int num_loads; /* Total memory loads in the window. */
37847 int num_stores; /* Total memory stores in the window. */
37848 int violation; /* Violation exists in window. */
37849 sched_insn_info *window; /* Pointer to the window. */
37850 struct dispatch_windows_s *next;
37851 struct dispatch_windows_s *prev;
37852 } dispatch_windows;
37854 /* Immediate valuse used in an insn. */
37855 typedef struct imm_info_s
37862 static dispatch_windows *dispatch_window_list;
37863 static dispatch_windows *dispatch_window_list1;
37865 /* Get dispatch group of insn. */
37867 static enum dispatch_group
37868 get_mem_group (rtx insn)
37870 enum attr_memory memory;
37872 if (INSN_CODE (insn) < 0)
37873 return disp_no_group;
37874 memory = get_attr_memory (insn);
37875 if (memory == MEMORY_STORE)
37878 if (memory == MEMORY_LOAD)
37881 if (memory == MEMORY_BOTH)
37882 return disp_load_store;
37884 return disp_no_group;
37887 /* Return true if insn is a compare instruction. */
37892 enum attr_type type;
37894 type = get_attr_type (insn);
37895 return (type == TYPE_TEST
37896 || type == TYPE_ICMP
37897 || type == TYPE_FCMP
37898 || GET_CODE (PATTERN (insn)) == COMPARE);
37901 /* Return true if a dispatch violation encountered. */
37904 dispatch_violation (void)
37906 if (dispatch_window_list->next)
37907 return dispatch_window_list->next->violation;
37908 return dispatch_window_list->violation;
37911 /* Return true if insn is a branch instruction. */
37914 is_branch (rtx insn)
37916 return (CALL_P (insn) || JUMP_P (insn));
37919 /* Return true if insn is a prefetch instruction. */
37922 is_prefetch (rtx insn)
37924 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
37927 /* This function initializes a dispatch window and the list container holding a
37928 pointer to the window. */
37931 init_window (int window_num)
37934 dispatch_windows *new_list;
37936 if (window_num == 0)
37937 new_list = dispatch_window_list;
37939 new_list = dispatch_window_list1;
37941 new_list->num_insn = 0;
37942 new_list->num_uops = 0;
37943 new_list->window_size = 0;
37944 new_list->next = NULL;
37945 new_list->prev = NULL;
37946 new_list->window_num = window_num;
37947 new_list->num_imm = 0;
37948 new_list->num_imm_32 = 0;
37949 new_list->num_imm_64 = 0;
37950 new_list->imm_size = 0;
37951 new_list->num_loads = 0;
37952 new_list->num_stores = 0;
37953 new_list->violation = false;
37955 for (i = 0; i < MAX_INSN; i++)
37957 new_list->window[i].insn = NULL;
37958 new_list->window[i].group = disp_no_group;
37959 new_list->window[i].path = no_path;
37960 new_list->window[i].byte_len = 0;
37961 new_list->window[i].imm_bytes = 0;
37966 /* This function allocates and initializes a dispatch window and the
37967 list container holding a pointer to the window. */
37969 static dispatch_windows *
37970 allocate_window (void)
37972 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
37973 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
37978 /* This routine initializes the dispatch scheduling information. It
37979 initiates building dispatch scheduler tables and constructs the
37980 first dispatch window. */
37983 init_dispatch_sched (void)
37985 /* Allocate a dispatch list and a window. */
37986 dispatch_window_list = allocate_window ();
37987 dispatch_window_list1 = allocate_window ();
37992 /* This function returns true if a branch is detected. End of a basic block
37993 does not have to be a branch, but here we assume only branches end a
37997 is_end_basic_block (enum dispatch_group group)
37999 return group == disp_branch;
38002 /* This function is called when the end of a window processing is reached. */
38005 process_end_window (void)
38007 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
38008 if (dispatch_window_list->next)
38010 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
38011 gcc_assert (dispatch_window_list->window_size
38012 + dispatch_window_list1->window_size <= 48);
38018 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
38019 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
38020 for 48 bytes of instructions. Note that these windows are not dispatch
38021 windows that their sizes are DISPATCH_WINDOW_SIZE. */
38023 static dispatch_windows *
38024 allocate_next_window (int window_num)
38026 if (window_num == 0)
38028 if (dispatch_window_list->next)
38031 return dispatch_window_list;
38034 dispatch_window_list->next = dispatch_window_list1;
38035 dispatch_window_list1->prev = dispatch_window_list;
38037 return dispatch_window_list1;
38040 /* Increment the number of immediate operands of an instruction. */
38043 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
38048 switch ( GET_CODE (*in_rtx))
38053 (imm_values->imm)++;
38054 if (x86_64_immediate_operand (*in_rtx, SImode))
38055 (imm_values->imm32)++;
38057 (imm_values->imm64)++;
38061 (imm_values->imm)++;
38062 (imm_values->imm64)++;
38066 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
38068 (imm_values->imm)++;
38069 (imm_values->imm32)++;
38080 /* Compute number of immediate operands of an instruction. */
38083 find_constant (rtx in_rtx, imm_info *imm_values)
38085 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
38086 (rtx_function) find_constant_1, (void *) imm_values);
38089 /* Return total size of immediate operands of an instruction along with number
38090 of corresponding immediate-operands. It initializes its parameters to zero
38091 befor calling FIND_CONSTANT.
38092 INSN is the input instruction. IMM is the total of immediates.
38093 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
38097 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
38099 imm_info imm_values = {0, 0, 0};
38101 find_constant (insn, &imm_values);
38102 *imm = imm_values.imm;
38103 *imm32 = imm_values.imm32;
38104 *imm64 = imm_values.imm64;
38105 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
38108 /* This function indicates if an operand of an instruction is an
38112 has_immediate (rtx insn)
38114 int num_imm_operand;
38115 int num_imm32_operand;
38116 int num_imm64_operand;
38119 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38120 &num_imm64_operand);
38124 /* Return single or double path for instructions. */
38126 static enum insn_path
38127 get_insn_path (rtx insn)
38129 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
38131 if ((int)path == 0)
38132 return path_single;
38134 if ((int)path == 1)
38135 return path_double;
38140 /* Return insn dispatch group. */
38142 static enum dispatch_group
38143 get_insn_group (rtx insn)
38145 enum dispatch_group group = get_mem_group (insn);
38149 if (is_branch (insn))
38150 return disp_branch;
38155 if (has_immediate (insn))
38158 if (is_prefetch (insn))
38159 return disp_prefetch;
38161 return disp_no_group;
38164 /* Count number of GROUP restricted instructions in a dispatch
38165 window WINDOW_LIST. */
38168 count_num_restricted (rtx insn, dispatch_windows *window_list)
38170 enum dispatch_group group = get_insn_group (insn);
38172 int num_imm_operand;
38173 int num_imm32_operand;
38174 int num_imm64_operand;
38176 if (group == disp_no_group)
38179 if (group == disp_imm)
38181 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38182 &num_imm64_operand);
38183 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
38184 || num_imm_operand + window_list->num_imm > MAX_IMM
38185 || (num_imm32_operand > 0
38186 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
38187 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
38188 || (num_imm64_operand > 0
38189 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
38190 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
38191 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
38192 && num_imm64_operand > 0
38193 && ((window_list->num_imm_64 > 0
38194 && window_list->num_insn >= 2)
38195 || window_list->num_insn >= 3)))
38201 if ((group == disp_load_store
38202 && (window_list->num_loads >= MAX_LOAD
38203 || window_list->num_stores >= MAX_STORE))
38204 || ((group == disp_load
38205 || group == disp_prefetch)
38206 && window_list->num_loads >= MAX_LOAD)
38207 || (group == disp_store
38208 && window_list->num_stores >= MAX_STORE))
38214 /* This function returns true if insn satisfies dispatch rules on the
38215 last window scheduled. */
38218 fits_dispatch_window (rtx insn)
38220 dispatch_windows *window_list = dispatch_window_list;
38221 dispatch_windows *window_list_next = dispatch_window_list->next;
38222 unsigned int num_restrict;
38223 enum dispatch_group group = get_insn_group (insn);
38224 enum insn_path path = get_insn_path (insn);
38227 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
38228 instructions should be given the lowest priority in the
38229 scheduling process in Haifa scheduler to make sure they will be
38230 scheduled in the same dispatch window as the refrence to them. */
38231 if (group == disp_jcc || group == disp_cmp)
38234 /* Check nonrestricted. */
38235 if (group == disp_no_group || group == disp_branch)
38238 /* Get last dispatch window. */
38239 if (window_list_next)
38240 window_list = window_list_next;
38242 if (window_list->window_num == 1)
38244 sum = window_list->prev->window_size + window_list->window_size;
38247 || (min_insn_size (insn) + sum) >= 48)
38248 /* Window 1 is full. Go for next window. */
38252 num_restrict = count_num_restricted (insn, window_list);
38254 if (num_restrict > num_allowable_groups[group])
38257 /* See if it fits in the first window. */
38258 if (window_list->window_num == 0)
38260 /* The first widow should have only single and double path
38262 if (path == path_double
38263 && (window_list->num_uops + 2) > MAX_INSN)
38265 else if (path != path_single)
38271 /* Add an instruction INSN with NUM_UOPS micro-operations to the
38272 dispatch window WINDOW_LIST. */
38275 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
38277 int byte_len = min_insn_size (insn);
38278 int num_insn = window_list->num_insn;
38280 sched_insn_info *window = window_list->window;
38281 enum dispatch_group group = get_insn_group (insn);
38282 enum insn_path path = get_insn_path (insn);
38283 int num_imm_operand;
38284 int num_imm32_operand;
38285 int num_imm64_operand;
38287 if (!window_list->violation && group != disp_cmp
38288 && !fits_dispatch_window (insn))
38289 window_list->violation = true;
38291 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38292 &num_imm64_operand);
38294 /* Initialize window with new instruction. */
38295 window[num_insn].insn = insn;
38296 window[num_insn].byte_len = byte_len;
38297 window[num_insn].group = group;
38298 window[num_insn].path = path;
38299 window[num_insn].imm_bytes = imm_size;
38301 window_list->window_size += byte_len;
38302 window_list->num_insn = num_insn + 1;
38303 window_list->num_uops = window_list->num_uops + num_uops;
38304 window_list->imm_size += imm_size;
38305 window_list->num_imm += num_imm_operand;
38306 window_list->num_imm_32 += num_imm32_operand;
38307 window_list->num_imm_64 += num_imm64_operand;
38309 if (group == disp_store)
38310 window_list->num_stores += 1;
38311 else if (group == disp_load
38312 || group == disp_prefetch)
38313 window_list->num_loads += 1;
38314 else if (group == disp_load_store)
38316 window_list->num_stores += 1;
38317 window_list->num_loads += 1;
38321 /* Adds a scheduled instruction, INSN, to the current dispatch window.
38322 If the total bytes of instructions or the number of instructions in
38323 the window exceed allowable, it allocates a new window. */
38326 add_to_dispatch_window (rtx insn)
38329 dispatch_windows *window_list;
38330 dispatch_windows *next_list;
38331 dispatch_windows *window0_list;
38332 enum insn_path path;
38333 enum dispatch_group insn_group;
38341 if (INSN_CODE (insn) < 0)
38344 byte_len = min_insn_size (insn);
38345 window_list = dispatch_window_list;
38346 next_list = window_list->next;
38347 path = get_insn_path (insn);
38348 insn_group = get_insn_group (insn);
38350 /* Get the last dispatch window. */
38352 window_list = dispatch_window_list->next;
38354 if (path == path_single)
38356 else if (path == path_double)
38359 insn_num_uops = (int) path;
38361 /* If current window is full, get a new window.
38362 Window number zero is full, if MAX_INSN uops are scheduled in it.
38363 Window number one is full, if window zero's bytes plus window
38364 one's bytes is 32, or if the bytes of the new instruction added
38365 to the total makes it greater than 48, or it has already MAX_INSN
38366 instructions in it. */
38367 num_insn = window_list->num_insn;
38368 num_uops = window_list->num_uops;
38369 window_num = window_list->window_num;
38370 insn_fits = fits_dispatch_window (insn);
38372 if (num_insn >= MAX_INSN
38373 || num_uops + insn_num_uops > MAX_INSN
38376 window_num = ~window_num & 1;
38377 window_list = allocate_next_window (window_num);
38380 if (window_num == 0)
38382 add_insn_window (insn, window_list, insn_num_uops);
38383 if (window_list->num_insn >= MAX_INSN
38384 && insn_group == disp_branch)
38386 process_end_window ();
38390 else if (window_num == 1)
38392 window0_list = window_list->prev;
38393 sum = window0_list->window_size + window_list->window_size;
38395 || (byte_len + sum) >= 48)
38397 process_end_window ();
38398 window_list = dispatch_window_list;
38401 add_insn_window (insn, window_list, insn_num_uops);
38404 gcc_unreachable ();
38406 if (is_end_basic_block (insn_group))
38408 /* End of basic block is reached do end-basic-block process. */
38409 process_end_window ();
38414 /* Print the dispatch window, WINDOW_NUM, to FILE. */
38416 DEBUG_FUNCTION static void
38417 debug_dispatch_window_file (FILE *file, int window_num)
38419 dispatch_windows *list;
38422 if (window_num == 0)
38423 list = dispatch_window_list;
38425 list = dispatch_window_list1;
38427 fprintf (file, "Window #%d:\n", list->window_num);
38428 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
38429 list->num_insn, list->num_uops, list->window_size);
38430 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38431 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
38433 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
38435 fprintf (file, " insn info:\n");
38437 for (i = 0; i < MAX_INSN; i++)
38439 if (!list->window[i].insn)
38441 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
38442 i, group_name[list->window[i].group],
38443 i, (void *)list->window[i].insn,
38444 i, list->window[i].path,
38445 i, list->window[i].byte_len,
38446 i, list->window[i].imm_bytes);
38450 /* Print to stdout a dispatch window. */
38452 DEBUG_FUNCTION void
38453 debug_dispatch_window (int window_num)
38455 debug_dispatch_window_file (stdout, window_num);
38458 /* Print INSN dispatch information to FILE. */
38460 DEBUG_FUNCTION static void
38461 debug_insn_dispatch_info_file (FILE *file, rtx insn)
38464 enum insn_path path;
38465 enum dispatch_group group;
38467 int num_imm_operand;
38468 int num_imm32_operand;
38469 int num_imm64_operand;
38471 if (INSN_CODE (insn) < 0)
38474 byte_len = min_insn_size (insn);
38475 path = get_insn_path (insn);
38476 group = get_insn_group (insn);
38477 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38478 &num_imm64_operand);
38480 fprintf (file, " insn info:\n");
38481 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
38482 group_name[group], path, byte_len);
38483 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38484 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
38487 /* Print to STDERR the status of the ready list with respect to
38488 dispatch windows. */
38490 DEBUG_FUNCTION void
38491 debug_ready_dispatch (void)
38494 int no_ready = number_in_ready ();
38496 fprintf (stdout, "Number of ready: %d\n", no_ready);
38498 for (i = 0; i < no_ready; i++)
38499 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
38502 /* This routine is the driver of the dispatch scheduler. */
38505 do_dispatch (rtx insn, int mode)
38507 if (mode == DISPATCH_INIT)
38508 init_dispatch_sched ();
38509 else if (mode == ADD_TO_DISPATCH_WINDOW)
38510 add_to_dispatch_window (insn);
38513 /* Return TRUE if Dispatch Scheduling is supported. */
38516 has_dispatch (rtx insn, int action)
38518 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
38519 && flag_dispatch_scheduler)
38525 case IS_DISPATCH_ON:
38530 return is_cmp (insn);
38532 case DISPATCH_VIOLATION:
38533 return dispatch_violation ();
38535 case FITS_DISPATCH_WINDOW:
38536 return fits_dispatch_window (insn);
38542 /* Implementation of reassociation_width target hook used by
38543 reassoc phase to identify parallelism level in reassociated
38544 tree. Statements tree_code is passed in OPC. Arguments type
38547 Currently parallel reassociation is enabled for Atom
38548 processors only and we set reassociation width to be 2
38549 because Atom may issue up to 2 instructions per cycle.
38551 Return value should be fixed if parallel reassociation is
38552 enabled for other processors. */
38555 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
38556 enum machine_mode mode)
38560 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
38562 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
38568 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
38569 place emms and femms instructions. */
38571 static enum machine_mode
38572 ix86_preferred_simd_mode (enum machine_mode mode)
38580 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
38582 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
38584 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
38586 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
38589 if (TARGET_AVX && !TARGET_PREFER_AVX128)
38595 if (!TARGET_VECTORIZE_DOUBLE)
38597 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
38599 else if (TARGET_SSE2)
38608 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
38611 static unsigned int
38612 ix86_autovectorize_vector_sizes (void)
38614 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
38617 /* Initialize the GCC target structure. */
38618 #undef TARGET_RETURN_IN_MEMORY
38619 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
38621 #undef TARGET_LEGITIMIZE_ADDRESS
38622 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
38624 #undef TARGET_ATTRIBUTE_TABLE
38625 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
38626 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38627 # undef TARGET_MERGE_DECL_ATTRIBUTES
38628 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
38631 #undef TARGET_COMP_TYPE_ATTRIBUTES
38632 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
38634 #undef TARGET_INIT_BUILTINS
38635 #define TARGET_INIT_BUILTINS ix86_init_builtins
38636 #undef TARGET_BUILTIN_DECL
38637 #define TARGET_BUILTIN_DECL ix86_builtin_decl
38638 #undef TARGET_EXPAND_BUILTIN
38639 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
38641 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
38642 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
38643 ix86_builtin_vectorized_function
38645 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
38646 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
38648 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
38649 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
38651 #undef TARGET_VECTORIZE_BUILTIN_GATHER
38652 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
38654 #undef TARGET_BUILTIN_RECIPROCAL
38655 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
38657 #undef TARGET_ASM_FUNCTION_EPILOGUE
38658 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
38660 #undef TARGET_ENCODE_SECTION_INFO
38661 #ifndef SUBTARGET_ENCODE_SECTION_INFO
38662 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
38664 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
38667 #undef TARGET_ASM_OPEN_PAREN
38668 #define TARGET_ASM_OPEN_PAREN ""
38669 #undef TARGET_ASM_CLOSE_PAREN
38670 #define TARGET_ASM_CLOSE_PAREN ""
38672 #undef TARGET_ASM_BYTE_OP
38673 #define TARGET_ASM_BYTE_OP ASM_BYTE
38675 #undef TARGET_ASM_ALIGNED_HI_OP
38676 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
38677 #undef TARGET_ASM_ALIGNED_SI_OP
38678 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
38680 #undef TARGET_ASM_ALIGNED_DI_OP
38681 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
38684 #undef TARGET_PROFILE_BEFORE_PROLOGUE
38685 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
38687 #undef TARGET_ASM_UNALIGNED_HI_OP
38688 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
38689 #undef TARGET_ASM_UNALIGNED_SI_OP
38690 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
38691 #undef TARGET_ASM_UNALIGNED_DI_OP
38692 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
38694 #undef TARGET_PRINT_OPERAND
38695 #define TARGET_PRINT_OPERAND ix86_print_operand
38696 #undef TARGET_PRINT_OPERAND_ADDRESS
38697 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
38698 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
38699 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
38700 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
38701 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
38703 #undef TARGET_SCHED_INIT_GLOBAL
38704 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
38705 #undef TARGET_SCHED_ADJUST_COST
38706 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
38707 #undef TARGET_SCHED_ISSUE_RATE
38708 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
38709 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
38710 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
38711 ia32_multipass_dfa_lookahead
38713 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
38714 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
38717 #undef TARGET_HAVE_TLS
38718 #define TARGET_HAVE_TLS true
38720 #undef TARGET_CANNOT_FORCE_CONST_MEM
38721 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
38722 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
38723 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
38725 #undef TARGET_DELEGITIMIZE_ADDRESS
38726 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
38728 #undef TARGET_MS_BITFIELD_LAYOUT_P
38729 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
38732 #undef TARGET_BINDS_LOCAL_P
38733 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
38735 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38736 #undef TARGET_BINDS_LOCAL_P
38737 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
38740 #undef TARGET_ASM_OUTPUT_MI_THUNK
38741 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
38742 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
38743 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
38745 #undef TARGET_ASM_FILE_START
38746 #define TARGET_ASM_FILE_START x86_file_start
38748 #undef TARGET_OPTION_OVERRIDE
38749 #define TARGET_OPTION_OVERRIDE ix86_option_override
38751 #undef TARGET_REGISTER_MOVE_COST
38752 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
38753 #undef TARGET_MEMORY_MOVE_COST
38754 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
38755 #undef TARGET_RTX_COSTS
38756 #define TARGET_RTX_COSTS ix86_rtx_costs
38757 #undef TARGET_ADDRESS_COST
38758 #define TARGET_ADDRESS_COST ix86_address_cost
38760 #undef TARGET_FIXED_CONDITION_CODE_REGS
38761 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
38762 #undef TARGET_CC_MODES_COMPATIBLE
38763 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
38765 #undef TARGET_MACHINE_DEPENDENT_REORG
38766 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
38768 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
38769 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
38771 #undef TARGET_BUILD_BUILTIN_VA_LIST
38772 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
38774 #undef TARGET_ENUM_VA_LIST_P
38775 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
38777 #undef TARGET_FN_ABI_VA_LIST
38778 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
38780 #undef TARGET_CANONICAL_VA_LIST_TYPE
38781 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
38783 #undef TARGET_EXPAND_BUILTIN_VA_START
38784 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
38786 #undef TARGET_MD_ASM_CLOBBERS
38787 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
38789 #undef TARGET_PROMOTE_PROTOTYPES
38790 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
38791 #undef TARGET_STRUCT_VALUE_RTX
38792 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
38793 #undef TARGET_SETUP_INCOMING_VARARGS
38794 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
38795 #undef TARGET_MUST_PASS_IN_STACK
38796 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
38797 #undef TARGET_FUNCTION_ARG_ADVANCE
38798 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
38799 #undef TARGET_FUNCTION_ARG
38800 #define TARGET_FUNCTION_ARG ix86_function_arg
38801 #undef TARGET_FUNCTION_ARG_BOUNDARY
38802 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
38803 #undef TARGET_PASS_BY_REFERENCE
38804 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
38805 #undef TARGET_INTERNAL_ARG_POINTER
38806 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
38807 #undef TARGET_UPDATE_STACK_BOUNDARY
38808 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
38809 #undef TARGET_GET_DRAP_RTX
38810 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
38811 #undef TARGET_STRICT_ARGUMENT_NAMING
38812 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
38813 #undef TARGET_STATIC_CHAIN
38814 #define TARGET_STATIC_CHAIN ix86_static_chain
38815 #undef TARGET_TRAMPOLINE_INIT
38816 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
38817 #undef TARGET_RETURN_POPS_ARGS
38818 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
38820 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
38821 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
38823 #undef TARGET_SCALAR_MODE_SUPPORTED_P
38824 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
38826 #undef TARGET_VECTOR_MODE_SUPPORTED_P
38827 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
38829 #undef TARGET_C_MODE_FOR_SUFFIX
38830 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
38833 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
38834 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
38837 #ifdef SUBTARGET_INSERT_ATTRIBUTES
38838 #undef TARGET_INSERT_ATTRIBUTES
38839 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
38842 #undef TARGET_MANGLE_TYPE
38843 #define TARGET_MANGLE_TYPE ix86_mangle_type
38846 #undef TARGET_STACK_PROTECT_FAIL
38847 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
38850 #undef TARGET_FUNCTION_VALUE
38851 #define TARGET_FUNCTION_VALUE ix86_function_value
38853 #undef TARGET_FUNCTION_VALUE_REGNO_P
38854 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
38856 #undef TARGET_PROMOTE_FUNCTION_MODE
38857 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
38859 #undef TARGET_SECONDARY_RELOAD
38860 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
38862 #undef TARGET_CLASS_MAX_NREGS
38863 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
38865 #undef TARGET_PREFERRED_RELOAD_CLASS
38866 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
38867 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
38868 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
38869 #undef TARGET_CLASS_LIKELY_SPILLED_P
38870 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
38872 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
38873 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
38874 ix86_builtin_vectorization_cost
38875 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
38876 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
38877 ix86_vectorize_vec_perm_const_ok
38878 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
38879 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
38880 ix86_preferred_simd_mode
38881 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
38882 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
38883 ix86_autovectorize_vector_sizes
38885 #undef TARGET_SET_CURRENT_FUNCTION
38886 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
38888 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
38889 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
38891 #undef TARGET_OPTION_SAVE
38892 #define TARGET_OPTION_SAVE ix86_function_specific_save
38894 #undef TARGET_OPTION_RESTORE
38895 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
38897 #undef TARGET_OPTION_PRINT
38898 #define TARGET_OPTION_PRINT ix86_function_specific_print
38900 #undef TARGET_CAN_INLINE_P
38901 #define TARGET_CAN_INLINE_P ix86_can_inline_p
38903 #undef TARGET_EXPAND_TO_RTL_HOOK
38904 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
38906 #undef TARGET_LEGITIMATE_ADDRESS_P
38907 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
38909 #undef TARGET_LEGITIMATE_CONSTANT_P
38910 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
38912 #undef TARGET_FRAME_POINTER_REQUIRED
38913 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
38915 #undef TARGET_CAN_ELIMINATE
38916 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
38918 #undef TARGET_EXTRA_LIVE_ON_ENTRY
38919 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
38921 #undef TARGET_ASM_CODE_END
38922 #define TARGET_ASM_CODE_END ix86_code_end
38924 #undef TARGET_CONDITIONAL_REGISTER_USAGE
38925 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
38928 #undef TARGET_INIT_LIBFUNCS
38929 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
38932 struct gcc_target targetm = TARGET_INITIALIZER;
38934 #include "gt-i386.h"