1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
131 rtx vzeroupper_insn = NULL_RTX;
136 if (BLOCK_INFO (bb)->unchanged)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb)->state = state;
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
154 BLOCK_INFO (bb)->prev = state;
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
165 while (insn != bb_end)
167 insn = NEXT_INSN (insn);
169 if (!NONDEBUG_INSN_P (insn))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
175 if (!vzeroupper_insn)
178 if (PREV_INSN (insn) != vzeroupper_insn)
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
190 vzeroupper_insn = NULL_RTX;
194 pat = PATTERN (insn);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
224 else if (state != used)
226 note_stores (pat, check_avx256_stores, &state);
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256 != callee_return_pass_avx256)
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
277 vzeroupper_insn = insn;
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
303 enum upper_128bits_state state, old_state, new_state;
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
310 if (BLOCK_INFO (block)->processed)
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
321 switch (BLOCK_INFO (e->src)->state)
324 if (!unknown_is_unused)
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state != old_state)
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
385 BLOCK_INFO (e->dest)->processed = true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
416 fprintf (dump_file, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending))
420 fibheap_swap = pending;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
427 sbitmap_zero (visited);
429 cfun->machine->rescan_vzeroupper_p = 0;
431 while (!fibheap_empty (worklist))
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
440 SET_BIT (visited, bb->index);
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
449 if (TEST_BIT (visited, e->dest->index))
451 if (!TEST_BIT (in_pending, e->dest->index))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
460 else if (!TEST_BIT (in_worklist, e->dest->index))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
471 if (!cfun->machine->rescan_vzeroupper_p)
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
483 fprintf (dump_file, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb, true);
488 free_aux_for_blocks ();
491 static rtx legitimize_dllimport_symbol (rtx, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
583 struct processor_costs i386_cost = { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs i486_cost = { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentium_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
778 DUMMY_STRINGOP_ALGS},
779 {{libcall, {{-1, rep_prefix_4_byte}}},
780 DUMMY_STRINGOP_ALGS},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
795 struct processor_costs pentiumpro_cost = {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
853 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
854 DUMMY_STRINGOP_ALGS},
855 {{rep_prefix_4_byte, {{1024, unrolled_loop},
856 {8192, rep_prefix_4_byte}, {-1, libcall}}},
857 DUMMY_STRINGOP_ALGS},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
872 struct processor_costs geode_cost = {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs k6_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs athlon_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1089 struct processor_costs k8_cost = {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1150 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1151 {{libcall, {{8, loop}, {24, unrolled_loop},
1152 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1153 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost = {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1212 MOVD reg64, xmmreg Double FADD 3
1214 MOVD reg32, xmmreg Double FADD 3
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1237 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1238 {{libcall, {{8, loop}, {24, unrolled_loop},
1239 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1240 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost = {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1299 MOVD reg64, xmmreg Double FADD 3
1301 MOVD reg32, xmmreg Double FADD 3
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1324 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1325 {{libcall, {{8, loop}, {24, unrolled_loop},
1326 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1327 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs bdver2_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1386 MOVD reg64, xmmreg Double FADD 3
1388 MOVD reg32, xmmreg Double FADD 3
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1411 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1412 {{libcall, {{8, loop}, {24, unrolled_loop},
1413 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1414 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1428 struct processor_costs btver1_cost = {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1473 MOVD reg64, xmmreg Double FADD 3
1475 MOVD reg32, xmmreg Double FADD 3
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1493 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1494 {{libcall, {{8, loop}, {24, unrolled_loop},
1495 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1496 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1511 struct processor_costs pentium4_cost = {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1564 DUMMY_STRINGOP_ALGS},
1565 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1567 DUMMY_STRINGOP_ALGS},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1582 struct processor_costs nocona_cost = {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1635 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1636 {100000, unrolled_loop}, {-1, libcall}}}},
1637 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1639 {libcall, {{24, loop}, {64, unrolled_loop},
1640 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1655 struct processor_costs atom_cost = {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 2, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1708 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1709 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1710 {{libcall, {{8, loop}, {15, unrolled_loop},
1711 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1712 {libcall, {{24, loop}, {32, unrolled_loop},
1713 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1729 struct processor_costs generic64_cost = {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS,
1788 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1789 {DUMMY_STRINGOP_ALGS,
1790 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1807 struct processor_costs generic32_cost = {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1860 DUMMY_STRINGOP_ALGS},
1861 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1862 DUMMY_STRINGOP_ALGS},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1876 const struct processor_costs *ix86_cost = &pentium_cost;
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features[X86_TUNE_LAST];
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7 | m_GENERIC,
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386 | m_486 | m_K6_GEODE,
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1972 /* X86_TUNE_USE_MOV0 */
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1987 /* X86_TUNE_READ_MODIFY */
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386 | m_486 | m_PENT),
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386 | m_P4_NOCONA,
1999 /* X86_TUNE_QIMODE_MATH */
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386 | m_486 | m_PENT | m_PPRO,
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386 | m_486 | m_PENT | m_K6_GEODE,
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT | m_K6_GEODE,
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO | m_P4_NOCONA,
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2078 /* X86_TUNE_SHIFT1 */
2081 /* X86_TUNE_USE_FFREEP */
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE | m_GENERIC),
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10 | m_BDVER ),
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2128 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2145 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2167 m_K6_GEODE | m_AMD_MULTIPLE,
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2174 /* Feature tests against the various architecture variations. */
2175 unsigned char ix86_arch_features[X86_ARCH_LAST];
2177 /* Feature tests against the various architecture variations, used to create
2178 ix86_arch_features based on the processor mask. */
2179 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2180 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2181 ~(m_386 | m_486 | m_PENT | m_K6),
2183 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2186 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2189 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2192 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2196 static const unsigned int x86_accumulate_outgoing_args
2197 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2199 static const unsigned int x86_arch_always_fancy_math_387
2200 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2202 static const unsigned int x86_avx256_split_unaligned_load
2203 = m_COREI7 | m_GENERIC;
2205 static const unsigned int x86_avx256_split_unaligned_store
2206 = m_COREI7 | m_BDVER | m_GENERIC;
2208 /* In case the average insn count for single function invocation is
2209 lower than this constant, emit fast (but longer) prologue and
2211 #define FAST_PROLOGUE_INSN_COUNT 20
2213 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2214 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2215 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2216 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2218 /* Array of the smallest class containing reg number REGNO, indexed by
2219 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2221 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2223 /* ax, dx, cx, bx */
2224 AREG, DREG, CREG, BREG,
2225 /* si, di, bp, sp */
2226 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2228 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2229 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2232 /* flags, fpsr, fpcr, frame */
2233 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2235 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2238 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2241 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2242 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2243 /* SSE REX registers */
2244 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2248 /* The "default" register map used in 32bit mode. */
2250 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2254 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2257 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2258 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2261 /* The "default" register map used in 64bit mode. */
2263 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2265 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2266 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2268 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2269 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2270 8,9,10,11,12,13,14,15, /* extended integer registers */
2271 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2274 /* Define the register numbers to be used in Dwarf debugging information.
2275 The SVR4 reference port C compiler uses the following register numbers
2276 in its Dwarf output code:
2277 0 for %eax (gcc regno = 0)
2278 1 for %ecx (gcc regno = 2)
2279 2 for %edx (gcc regno = 1)
2280 3 for %ebx (gcc regno = 3)
2281 4 for %esp (gcc regno = 7)
2282 5 for %ebp (gcc regno = 6)
2283 6 for %esi (gcc regno = 4)
2284 7 for %edi (gcc regno = 5)
2285 The following three DWARF register numbers are never generated by
2286 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2287 believes these numbers have these meanings.
2288 8 for %eip (no gcc equivalent)
2289 9 for %eflags (gcc regno = 17)
2290 10 for %trapno (no gcc equivalent)
2291 It is not at all clear how we should number the FP stack registers
2292 for the x86 architecture. If the version of SDB on x86/svr4 were
2293 a bit less brain dead with respect to floating-point then we would
2294 have a precedent to follow with respect to DWARF register numbers
2295 for x86 FP registers, but the SDB on x86/svr4 is so completely
2296 broken with respect to FP registers that it is hardly worth thinking
2297 of it as something to strive for compatibility with.
2298 The version of x86/svr4 SDB I have at the moment does (partially)
2299 seem to believe that DWARF register number 11 is associated with
2300 the x86 register %st(0), but that's about all. Higher DWARF
2301 register numbers don't seem to be associated with anything in
2302 particular, and even for DWARF regno 11, SDB only seems to under-
2303 stand that it should say that a variable lives in %st(0) (when
2304 asked via an `=' command) if we said it was in DWARF regno 11,
2305 but SDB still prints garbage when asked for the value of the
2306 variable in question (via a `/' command).
2307 (Also note that the labels SDB prints for various FP stack regs
2308 when doing an `x' command are all wrong.)
2309 Note that these problems generally don't affect the native SVR4
2310 C compiler because it doesn't allow the use of -O with -g and
2311 because when it is *not* optimizing, it allocates a memory
2312 location for each floating-point variable, and the memory
2313 location is what gets described in the DWARF AT_location
2314 attribute for the variable in question.
2315 Regardless of the severe mental illness of the x86/svr4 SDB, we
2316 do something sensible here and we use the following DWARF
2317 register numbers. Note that these are all stack-top-relative
2319 11 for %st(0) (gcc regno = 8)
2320 12 for %st(1) (gcc regno = 9)
2321 13 for %st(2) (gcc regno = 10)
2322 14 for %st(3) (gcc regno = 11)
2323 15 for %st(4) (gcc regno = 12)
2324 16 for %st(5) (gcc regno = 13)
2325 17 for %st(6) (gcc regno = 14)
2326 18 for %st(7) (gcc regno = 15)
2328 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2330 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2331 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2332 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2333 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2334 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2335 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2336 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2339 /* Define parameter passing and return registers. */
2341 static int const x86_64_int_parameter_registers[6] =
2343 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2346 static int const x86_64_ms_abi_int_parameter_registers[4] =
2348 CX_REG, DX_REG, R8_REG, R9_REG
2351 static int const x86_64_int_return_registers[4] =
2353 AX_REG, DX_REG, DI_REG, SI_REG
2356 /* Define the structure for the machine field in struct function. */
2358 struct GTY(()) stack_local_entry {
2359 unsigned short mode;
2362 struct stack_local_entry *next;
2365 /* Structure describing stack frame layout.
2366 Stack grows downward:
2372 saved static chain if ix86_static_chain_on_stack
2374 saved frame pointer if frame_pointer_needed
2375 <- HARD_FRAME_POINTER
2381 <- sse_regs_save_offset
2384 [va_arg registers] |
2388 [padding2] | = to_allocate
2397 int outgoing_arguments_size;
2398 HOST_WIDE_INT frame;
2400 /* The offsets relative to ARG_POINTER. */
2401 HOST_WIDE_INT frame_pointer_offset;
2402 HOST_WIDE_INT hard_frame_pointer_offset;
2403 HOST_WIDE_INT stack_pointer_offset;
2404 HOST_WIDE_INT hfp_save_offset;
2405 HOST_WIDE_INT reg_save_offset;
2406 HOST_WIDE_INT sse_reg_save_offset;
2408 /* When save_regs_using_mov is set, emit prologue using
2409 move instead of push instructions. */
2410 bool save_regs_using_mov;
2413 /* Which cpu are we scheduling for. */
2414 enum attr_cpu ix86_schedule;
2416 /* Which cpu are we optimizing for. */
2417 enum processor_type ix86_tune;
2419 /* Which instruction set architecture to use. */
2420 enum processor_type ix86_arch;
2422 /* true if sse prefetch instruction is not NOOP. */
2423 int x86_prefetch_sse;
2425 /* -mstackrealign option */
2426 static const char ix86_force_align_arg_pointer_string[]
2427 = "force_align_arg_pointer";
2429 static rtx (*ix86_gen_leave) (void);
2430 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2431 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2432 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2433 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2434 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2435 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2436 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2437 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2438 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2440 /* Preferred alignment for stack boundary in bits. */
2441 unsigned int ix86_preferred_stack_boundary;
2443 /* Alignment for incoming stack boundary in bits specified at
2445 static unsigned int ix86_user_incoming_stack_boundary;
2447 /* Default alignment for incoming stack boundary in bits. */
2448 static unsigned int ix86_default_incoming_stack_boundary;
2450 /* Alignment for incoming stack boundary in bits. */
2451 unsigned int ix86_incoming_stack_boundary;
2453 /* Calling abi specific va_list type nodes. */
2454 static GTY(()) tree sysv_va_list_type_node;
2455 static GTY(()) tree ms_va_list_type_node;
2457 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2458 char internal_label_prefix[16];
2459 int internal_label_prefix_len;
2461 /* Fence to use after loop using movnt. */
2464 /* Register class used for passing given 64bit part of the argument.
2465 These represent classes as documented by the PS ABI, with the exception
2466 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2467 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2469 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2470 whenever possible (upper half does contain padding). */
2471 enum x86_64_reg_class
2474 X86_64_INTEGER_CLASS,
2475 X86_64_INTEGERSI_CLASS,
2482 X86_64_COMPLEX_X87_CLASS,
2486 #define MAX_CLASSES 4
2488 /* Table of constants used by fldpi, fldln2, etc.... */
2489 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2490 static bool ext_80387_constants_init = 0;
2493 static struct machine_function * ix86_init_machine_status (void);
2494 static rtx ix86_function_value (const_tree, const_tree, bool);
2495 static bool ix86_function_value_regno_p (const unsigned int);
2496 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2498 static rtx ix86_static_chain (const_tree, bool);
2499 static int ix86_function_regparm (const_tree, const_tree);
2500 static void ix86_compute_frame_layout (struct ix86_frame *);
2501 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2503 static void ix86_add_new_builtins (HOST_WIDE_INT);
2504 static rtx ix86_expand_vec_perm_builtin (tree);
2505 static tree ix86_canonical_va_list_type (tree);
2506 static void predict_jump (int);
2507 static unsigned int split_stack_prologue_scratch_regno (void);
2508 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2510 enum ix86_function_specific_strings
2512 IX86_FUNCTION_SPECIFIC_ARCH,
2513 IX86_FUNCTION_SPECIFIC_TUNE,
2514 IX86_FUNCTION_SPECIFIC_MAX
2517 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2518 const char *, enum fpmath_unit, bool);
2519 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2520 static void ix86_function_specific_save (struct cl_target_option *);
2521 static void ix86_function_specific_restore (struct cl_target_option *);
2522 static void ix86_function_specific_print (FILE *, int,
2523 struct cl_target_option *);
2524 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2525 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2526 struct gcc_options *);
2527 static bool ix86_can_inline_p (tree, tree);
2528 static void ix86_set_current_function (tree);
2529 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2531 static enum calling_abi ix86_function_abi (const_tree);
2534 #ifndef SUBTARGET32_DEFAULT_CPU
2535 #define SUBTARGET32_DEFAULT_CPU "i386"
2538 /* The svr4 ABI for the i386 says that records and unions are returned
2540 #ifndef DEFAULT_PCC_STRUCT_RETURN
2541 #define DEFAULT_PCC_STRUCT_RETURN 1
2544 /* Whether -mtune= or -march= were specified */
2545 static int ix86_tune_defaulted;
2546 static int ix86_arch_specified;
2548 /* Vectorization library interface and handlers. */
2549 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2551 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2552 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2554 /* Processor target table, indexed by processor number */
2557 const struct processor_costs *cost; /* Processor costs */
2558 const int align_loop; /* Default alignments. */
2559 const int align_loop_max_skip;
2560 const int align_jump;
2561 const int align_jump_max_skip;
2562 const int align_func;
2565 static const struct ptt processor_target_table[PROCESSOR_max] =
2567 {&i386_cost, 4, 3, 4, 3, 4},
2568 {&i486_cost, 16, 15, 16, 15, 16},
2569 {&pentium_cost, 16, 7, 16, 7, 16},
2570 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2571 {&geode_cost, 0, 0, 0, 0, 0},
2572 {&k6_cost, 32, 7, 32, 7, 32},
2573 {&athlon_cost, 16, 7, 16, 7, 16},
2574 {&pentium4_cost, 0, 0, 0, 0, 0},
2575 {&k8_cost, 16, 7, 16, 7, 16},
2576 {&nocona_cost, 0, 0, 0, 0, 0},
2577 /* Core 2 32-bit. */
2578 {&generic32_cost, 16, 10, 16, 10, 16},
2579 /* Core 2 64-bit. */
2580 {&generic64_cost, 16, 10, 16, 10, 16},
2581 /* Core i7 32-bit. */
2582 {&generic32_cost, 16, 10, 16, 10, 16},
2583 /* Core i7 64-bit. */
2584 {&generic64_cost, 16, 10, 16, 10, 16},
2585 {&generic32_cost, 16, 7, 16, 7, 16},
2586 {&generic64_cost, 16, 10, 16, 10, 16},
2587 {&amdfam10_cost, 32, 24, 32, 7, 32},
2588 {&bdver1_cost, 32, 24, 32, 7, 32},
2589 {&bdver2_cost, 32, 24, 32, 7, 32},
2590 {&btver1_cost, 32, 24, 32, 7, 32},
2591 {&atom_cost, 16, 7, 16, 7, 16}
2594 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2624 /* Return true if a red-zone is in use. */
2627 ix86_using_red_zone (void)
2629 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2632 /* Return a string that documents the current -m options. The caller is
2633 responsible for freeing the string. */
2636 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2637 const char *tune, enum fpmath_unit fpmath,
2640 struct ix86_target_opts
2642 const char *option; /* option string */
2643 HOST_WIDE_INT mask; /* isa mask options */
2646 /* This table is ordered so that options like -msse4.2 that imply
2647 preceding options while match those first. */
2648 static struct ix86_target_opts isa_opts[] =
2650 { "-m64", OPTION_MASK_ISA_64BIT },
2651 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2652 { "-mfma", OPTION_MASK_ISA_FMA },
2653 { "-mxop", OPTION_MASK_ISA_XOP },
2654 { "-mlwp", OPTION_MASK_ISA_LWP },
2655 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2656 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2657 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2658 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2659 { "-msse3", OPTION_MASK_ISA_SSE3 },
2660 { "-msse2", OPTION_MASK_ISA_SSE2 },
2661 { "-msse", OPTION_MASK_ISA_SSE },
2662 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2663 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2664 { "-mmmx", OPTION_MASK_ISA_MMX },
2665 { "-mabm", OPTION_MASK_ISA_ABM },
2666 { "-mbmi", OPTION_MASK_ISA_BMI },
2667 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2668 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2669 { "-mtbm", OPTION_MASK_ISA_TBM },
2670 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2671 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2672 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2673 { "-maes", OPTION_MASK_ISA_AES },
2674 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2675 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2676 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2677 { "-mf16c", OPTION_MASK_ISA_F16C },
2681 static struct ix86_target_opts flag_opts[] =
2683 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2684 { "-m80387", MASK_80387 },
2685 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2686 { "-malign-double", MASK_ALIGN_DOUBLE },
2687 { "-mcld", MASK_CLD },
2688 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2689 { "-mieee-fp", MASK_IEEE_FP },
2690 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2691 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2692 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2693 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2694 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2695 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2696 { "-mno-red-zone", MASK_NO_RED_ZONE },
2697 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2698 { "-mrecip", MASK_RECIP },
2699 { "-mrtd", MASK_RTD },
2700 { "-msseregparm", MASK_SSEREGPARM },
2701 { "-mstack-arg-probe", MASK_STACK_PROBE },
2702 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2703 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2704 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2705 { "-mvzeroupper", MASK_VZEROUPPER },
2706 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2707 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2708 { "-mprefer-avx128", MASK_PREFER_AVX128},
2711 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2714 char target_other[40];
2723 memset (opts, '\0', sizeof (opts));
2725 /* Add -march= option. */
2728 opts[num][0] = "-march=";
2729 opts[num++][1] = arch;
2732 /* Add -mtune= option. */
2735 opts[num][0] = "-mtune=";
2736 opts[num++][1] = tune;
2739 /* Pick out the options in isa options. */
2740 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2742 if ((isa & isa_opts[i].mask) != 0)
2744 opts[num++][0] = isa_opts[i].option;
2745 isa &= ~ isa_opts[i].mask;
2749 if (isa && add_nl_p)
2751 opts[num++][0] = isa_other;
2752 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2756 /* Add flag options. */
2757 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2759 if ((flags & flag_opts[i].mask) != 0)
2761 opts[num++][0] = flag_opts[i].option;
2762 flags &= ~ flag_opts[i].mask;
2766 if (flags && add_nl_p)
2768 opts[num++][0] = target_other;
2769 sprintf (target_other, "(other flags: %#x)", flags);
2772 /* Add -fpmath= option. */
2775 opts[num][0] = "-mfpmath=";
2776 switch ((int) fpmath)
2779 opts[num++][1] = "387";
2783 opts[num++][1] = "sse";
2786 case FPMATH_387 | FPMATH_SSE:
2787 opts[num++][1] = "sse+387";
2799 gcc_assert (num < ARRAY_SIZE (opts));
2801 /* Size the string. */
2803 sep_len = (add_nl_p) ? 3 : 1;
2804 for (i = 0; i < num; i++)
2807 for (j = 0; j < 2; j++)
2809 len += strlen (opts[i][j]);
2812 /* Build the string. */
2813 ret = ptr = (char *) xmalloc (len);
2816 for (i = 0; i < num; i++)
2820 for (j = 0; j < 2; j++)
2821 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2828 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2836 for (j = 0; j < 2; j++)
2839 memcpy (ptr, opts[i][j], len2[j]);
2841 line_len += len2[j];
2846 gcc_assert (ret + len >= ptr);
2851 /* Return true, if profiling code should be emitted before
2852 prologue. Otherwise it returns false.
2853 Note: For x86 with "hotfix" it is sorried. */
2855 ix86_profile_before_prologue (void)
2857 return flag_fentry != 0;
2860 /* Function that is callable from the debugger to print the current
2863 ix86_debug_options (void)
2865 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2866 ix86_arch_string, ix86_tune_string,
2871 fprintf (stderr, "%s\n\n", opts);
2875 fputs ("<no options>\n\n", stderr);
2880 /* Override various settings based on options. If MAIN_ARGS_P, the
2881 options are from the command line, otherwise they are from
2885 ix86_option_override_internal (bool main_args_p)
2888 unsigned int ix86_arch_mask, ix86_tune_mask;
2889 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2894 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2895 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2896 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2897 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2898 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2899 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2900 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2901 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2902 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2903 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2904 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2905 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2906 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2907 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2908 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2909 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2910 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2911 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2912 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2913 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2914 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2915 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2916 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2917 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2918 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2919 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2920 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2921 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2922 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2923 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2924 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2925 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2926 /* if this reaches 64, need to widen struct pta flags below */
2930 const char *const name; /* processor name or nickname. */
2931 const enum processor_type processor;
2932 const enum attr_cpu schedule;
2933 const unsigned HOST_WIDE_INT flags;
2935 const processor_alias_table[] =
2937 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2938 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2939 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2940 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2941 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2942 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2943 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2944 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2945 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2946 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2947 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2948 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2949 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2951 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2953 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2954 PTA_MMX | PTA_SSE | PTA_SSE2},
2955 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2956 PTA_MMX |PTA_SSE | PTA_SSE2},
2957 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2958 PTA_MMX | PTA_SSE | PTA_SSE2},
2959 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2960 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2961 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2962 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2963 | PTA_CX16 | PTA_NO_SAHF},
2964 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2965 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2966 | PTA_SSSE3 | PTA_CX16},
2967 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2968 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2969 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2970 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2971 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2972 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2973 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2974 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
2975 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2976 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2977 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2978 | PTA_RDRND | PTA_F16C},
2979 {"core-avx2", PROCESSOR_COREI7_64, CPU_COREI7,
2980 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2981 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
2982 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2983 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
2984 | PTA_FMA | PTA_MOVBE},
2985 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2986 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2987 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2988 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2989 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2990 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2991 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2992 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2993 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2994 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2995 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2996 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2997 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2998 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2999 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3000 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3001 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3002 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3003 {"x86-64", PROCESSOR_K8, CPU_K8,
3004 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3005 {"k8", PROCESSOR_K8, CPU_K8,
3006 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3007 | PTA_SSE2 | PTA_NO_SAHF},
3008 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3009 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3010 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3011 {"opteron", PROCESSOR_K8, CPU_K8,
3012 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3013 | PTA_SSE2 | PTA_NO_SAHF},
3014 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3015 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3016 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3017 {"athlon64", PROCESSOR_K8, CPU_K8,
3018 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3019 | PTA_SSE2 | PTA_NO_SAHF},
3020 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3021 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3022 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3023 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3024 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3025 | PTA_SSE2 | PTA_NO_SAHF},
3026 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3027 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3028 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3029 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3030 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3031 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3032 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3033 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3034 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3035 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3036 | PTA_XOP | PTA_LWP},
3037 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3038 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3039 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3040 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3041 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3043 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3044 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3045 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3046 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3047 0 /* flags are only used for -march switch. */ },
3048 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3049 PTA_64BIT /* flags are only used for -march switch. */ },
3052 int const pta_size = ARRAY_SIZE (processor_alias_table);
3054 /* Set up prefix/suffix so the error messages refer to either the command
3055 line argument, or the attribute(target). */
3064 prefix = "option(\"";
3069 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3070 SUBTARGET_OVERRIDE_OPTIONS;
3073 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3074 SUBSUBTARGET_OVERRIDE_OPTIONS;
3078 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3080 /* -fPIC is the default for x86_64. */
3081 if (TARGET_MACHO && TARGET_64BIT)
3084 /* Need to check -mtune=generic first. */
3085 if (ix86_tune_string)
3087 if (!strcmp (ix86_tune_string, "generic")
3088 || !strcmp (ix86_tune_string, "i686")
3089 /* As special support for cross compilers we read -mtune=native
3090 as -mtune=generic. With native compilers we won't see the
3091 -mtune=native, as it was changed by the driver. */
3092 || !strcmp (ix86_tune_string, "native"))
3095 ix86_tune_string = "generic64";
3097 ix86_tune_string = "generic32";
3099 /* If this call is for setting the option attribute, allow the
3100 generic32/generic64 that was previously set. */
3101 else if (!main_args_p
3102 && (!strcmp (ix86_tune_string, "generic32")
3103 || !strcmp (ix86_tune_string, "generic64")))
3105 else if (!strncmp (ix86_tune_string, "generic", 7))
3106 error ("bad value (%s) for %stune=%s %s",
3107 ix86_tune_string, prefix, suffix, sw);
3108 else if (!strcmp (ix86_tune_string, "x86-64"))
3109 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3110 "%stune=k8%s or %stune=generic%s instead as appropriate",
3111 prefix, suffix, prefix, suffix, prefix, suffix);
3115 if (ix86_arch_string)
3116 ix86_tune_string = ix86_arch_string;
3117 if (!ix86_tune_string)
3119 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3120 ix86_tune_defaulted = 1;
3123 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3124 need to use a sensible tune option. */
3125 if (!strcmp (ix86_tune_string, "generic")
3126 || !strcmp (ix86_tune_string, "x86-64")
3127 || !strcmp (ix86_tune_string, "i686"))
3130 ix86_tune_string = "generic64";
3132 ix86_tune_string = "generic32";
3136 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3138 /* rep; movq isn't available in 32-bit code. */
3139 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3140 ix86_stringop_alg = no_stringop;
3143 if (!ix86_arch_string)
3144 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3146 ix86_arch_specified = 1;
3148 if (!global_options_set.x_ix86_abi)
3149 ix86_abi = DEFAULT_ABI;
3151 if (global_options_set.x_ix86_cmodel)
3153 switch (ix86_cmodel)
3158 ix86_cmodel = CM_SMALL_PIC;
3160 error ("code model %qs not supported in the %s bit mode",
3167 ix86_cmodel = CM_MEDIUM_PIC;
3169 error ("code model %qs not supported in the %s bit mode",
3171 else if (TARGET_X32)
3172 error ("code model %qs not supported in x32 mode",
3179 ix86_cmodel = CM_LARGE_PIC;
3181 error ("code model %qs not supported in the %s bit mode",
3183 else if (TARGET_X32)
3184 error ("code model %qs not supported in x32 mode",
3190 error ("code model %s does not support PIC mode", "32");
3192 error ("code model %qs not supported in the %s bit mode",
3199 error ("code model %s does not support PIC mode", "kernel");
3200 ix86_cmodel = CM_32;
3203 error ("code model %qs not supported in the %s bit mode",
3213 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3214 use of rip-relative addressing. This eliminates fixups that
3215 would otherwise be needed if this object is to be placed in a
3216 DLL, and is essentially just as efficient as direct addressing. */
3217 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3218 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3219 else if (TARGET_64BIT)
3220 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3222 ix86_cmodel = CM_32;
3224 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3226 error ("-masm=intel not supported in this configuration");
3227 ix86_asm_dialect = ASM_ATT;
3229 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3230 sorry ("%i-bit mode not compiled in",
3231 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3233 for (i = 0; i < pta_size; i++)
3234 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3236 ix86_schedule = processor_alias_table[i].schedule;
3237 ix86_arch = processor_alias_table[i].processor;
3238 /* Default cpu tuning to the architecture. */
3239 ix86_tune = ix86_arch;
3241 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3242 error ("CPU you selected does not support x86-64 "
3245 if (processor_alias_table[i].flags & PTA_MMX
3246 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3247 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3248 if (processor_alias_table[i].flags & PTA_3DNOW
3249 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3250 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3251 if (processor_alias_table[i].flags & PTA_3DNOW_A
3252 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3253 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3254 if (processor_alias_table[i].flags & PTA_SSE
3255 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3256 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3257 if (processor_alias_table[i].flags & PTA_SSE2
3258 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3259 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3260 if (processor_alias_table[i].flags & PTA_SSE3
3261 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3262 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3263 if (processor_alias_table[i].flags & PTA_SSSE3
3264 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3265 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3266 if (processor_alias_table[i].flags & PTA_SSE4_1
3267 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3268 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3269 if (processor_alias_table[i].flags & PTA_SSE4_2
3270 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3271 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3272 if (processor_alias_table[i].flags & PTA_AVX
3273 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3274 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3275 if (processor_alias_table[i].flags & PTA_AVX2
3276 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3277 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3278 if (processor_alias_table[i].flags & PTA_FMA
3279 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3280 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3281 if (processor_alias_table[i].flags & PTA_SSE4A
3282 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3283 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3284 if (processor_alias_table[i].flags & PTA_FMA4
3285 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3286 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3287 if (processor_alias_table[i].flags & PTA_XOP
3288 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3289 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3290 if (processor_alias_table[i].flags & PTA_LWP
3291 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3292 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3293 if (processor_alias_table[i].flags & PTA_ABM
3294 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3295 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3296 if (processor_alias_table[i].flags & PTA_BMI
3297 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3298 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3299 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3300 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3301 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3302 if (processor_alias_table[i].flags & PTA_TBM
3303 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3304 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3305 if (processor_alias_table[i].flags & PTA_BMI2
3306 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3307 ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3308 if (processor_alias_table[i].flags & PTA_CX16
3309 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3310 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3311 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3312 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3313 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3314 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3315 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3316 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3317 if (processor_alias_table[i].flags & PTA_MOVBE
3318 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3319 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3320 if (processor_alias_table[i].flags & PTA_AES
3321 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3322 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3323 if (processor_alias_table[i].flags & PTA_PCLMUL
3324 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3325 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3326 if (processor_alias_table[i].flags & PTA_FSGSBASE
3327 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3328 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3329 if (processor_alias_table[i].flags & PTA_RDRND
3330 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3331 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3332 if (processor_alias_table[i].flags & PTA_F16C
3333 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3334 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3335 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3336 x86_prefetch_sse = true;
3341 if (!strcmp (ix86_arch_string, "generic"))
3342 error ("generic CPU can be used only for %stune=%s %s",
3343 prefix, suffix, sw);
3344 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3345 error ("bad value (%s) for %sarch=%s %s",
3346 ix86_arch_string, prefix, suffix, sw);
3348 ix86_arch_mask = 1u << ix86_arch;
3349 for (i = 0; i < X86_ARCH_LAST; ++i)
3350 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3352 for (i = 0; i < pta_size; i++)
3353 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3355 ix86_schedule = processor_alias_table[i].schedule;
3356 ix86_tune = processor_alias_table[i].processor;
3359 if (!(processor_alias_table[i].flags & PTA_64BIT))
3361 if (ix86_tune_defaulted)
3363 ix86_tune_string = "x86-64";
3364 for (i = 0; i < pta_size; i++)
3365 if (! strcmp (ix86_tune_string,
3366 processor_alias_table[i].name))
3368 ix86_schedule = processor_alias_table[i].schedule;
3369 ix86_tune = processor_alias_table[i].processor;
3372 error ("CPU you selected does not support x86-64 "
3378 /* Adjust tuning when compiling for 32-bit ABI. */
3381 case PROCESSOR_GENERIC64:
3382 ix86_tune = PROCESSOR_GENERIC32;
3383 ix86_schedule = CPU_PENTIUMPRO;
3386 case PROCESSOR_CORE2_64:
3387 ix86_tune = PROCESSOR_CORE2_32;
3390 case PROCESSOR_COREI7_64:
3391 ix86_tune = PROCESSOR_COREI7_32;
3398 /* Intel CPUs have always interpreted SSE prefetch instructions as
3399 NOPs; so, we can enable SSE prefetch instructions even when
3400 -mtune (rather than -march) points us to a processor that has them.
3401 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3402 higher processors. */
3404 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3405 x86_prefetch_sse = true;
3409 if (ix86_tune_specified && i == pta_size)
3410 error ("bad value (%s) for %stune=%s %s",
3411 ix86_tune_string, prefix, suffix, sw);
3413 ix86_tune_mask = 1u << ix86_tune;
3414 for (i = 0; i < X86_TUNE_LAST; ++i)
3415 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3417 #ifndef USE_IX86_FRAME_POINTER
3418 #define USE_IX86_FRAME_POINTER 0
3421 #ifndef USE_X86_64_FRAME_POINTER
3422 #define USE_X86_64_FRAME_POINTER 0
3425 /* Set the default values for switches whose default depends on TARGET_64BIT
3426 in case they weren't overwritten by command line options. */
3429 if (optimize > 1 && !global_options_set.x_flag_zee)
3431 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3432 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3433 if (flag_asynchronous_unwind_tables == 2)
3434 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3435 if (flag_pcc_struct_return == 2)
3436 flag_pcc_struct_return = 0;
3440 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3441 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3442 if (flag_asynchronous_unwind_tables == 2)
3443 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3444 if (flag_pcc_struct_return == 2)
3445 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3449 ix86_cost = &ix86_size_cost;
3451 ix86_cost = processor_target_table[ix86_tune].cost;
3453 /* Arrange to set up i386_stack_locals for all functions. */
3454 init_machine_status = ix86_init_machine_status;
3456 /* Validate -mregparm= value. */
3457 if (global_options_set.x_ix86_regparm)
3460 warning (0, "-mregparm is ignored in 64-bit mode");
3461 if (ix86_regparm > REGPARM_MAX)
3463 error ("-mregparm=%d is not between 0 and %d",
3464 ix86_regparm, REGPARM_MAX);
3469 ix86_regparm = REGPARM_MAX;
3471 /* Default align_* from the processor table. */
3472 if (align_loops == 0)
3474 align_loops = processor_target_table[ix86_tune].align_loop;
3475 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3477 if (align_jumps == 0)
3479 align_jumps = processor_target_table[ix86_tune].align_jump;
3480 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3482 if (align_functions == 0)
3484 align_functions = processor_target_table[ix86_tune].align_func;
3487 /* Provide default for -mbranch-cost= value. */
3488 if (!global_options_set.x_ix86_branch_cost)
3489 ix86_branch_cost = ix86_cost->branch_cost;
3493 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3495 /* Enable by default the SSE and MMX builtins. Do allow the user to
3496 explicitly disable any of these. In particular, disabling SSE and
3497 MMX for kernel code is extremely useful. */
3498 if (!ix86_arch_specified)
3500 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3501 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3504 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3508 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3510 if (!ix86_arch_specified)
3512 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3514 /* i386 ABI does not specify red zone. It still makes sense to use it
3515 when programmer takes care to stack from being destroyed. */
3516 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3517 target_flags |= MASK_NO_RED_ZONE;
3520 /* Keep nonleaf frame pointers. */
3521 if (flag_omit_frame_pointer)
3522 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3523 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3524 flag_omit_frame_pointer = 1;
3526 /* If we're doing fast math, we don't care about comparison order
3527 wrt NaNs. This lets us use a shorter comparison sequence. */
3528 if (flag_finite_math_only)
3529 target_flags &= ~MASK_IEEE_FP;
3531 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3532 since the insns won't need emulation. */
3533 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3534 target_flags &= ~MASK_NO_FANCY_MATH_387;
3536 /* Likewise, if the target doesn't have a 387, or we've specified
3537 software floating point, don't use 387 inline intrinsics. */
3539 target_flags |= MASK_NO_FANCY_MATH_387;
3541 /* Turn on MMX builtins for -msse. */
3544 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3545 x86_prefetch_sse = true;
3548 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3549 if (TARGET_SSE4_2 || TARGET_ABM)
3550 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3552 /* Turn on lzcnt instruction for -mabm. */
3554 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3556 /* Validate -mpreferred-stack-boundary= value or default it to
3557 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3558 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3559 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3561 int min = (TARGET_64BIT ? 4 : 2);
3562 int max = (TARGET_SEH ? 4 : 12);
3564 if (ix86_preferred_stack_boundary_arg < min
3565 || ix86_preferred_stack_boundary_arg > max)
3568 error ("-mpreferred-stack-boundary is not supported "
3571 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3572 ix86_preferred_stack_boundary_arg, min, max);
3575 ix86_preferred_stack_boundary
3576 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3579 /* Set the default value for -mstackrealign. */
3580 if (ix86_force_align_arg_pointer == -1)
3581 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3583 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3585 /* Validate -mincoming-stack-boundary= value or default it to
3586 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3587 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3588 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3590 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3591 || ix86_incoming_stack_boundary_arg > 12)
3592 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3593 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3596 ix86_user_incoming_stack_boundary
3597 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3598 ix86_incoming_stack_boundary
3599 = ix86_user_incoming_stack_boundary;
3603 /* Accept -msseregparm only if at least SSE support is enabled. */
3604 if (TARGET_SSEREGPARM
3606 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3608 if (global_options_set.x_ix86_fpmath)
3610 if (ix86_fpmath & FPMATH_SSE)
3614 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3615 ix86_fpmath = FPMATH_387;
3617 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3619 warning (0, "387 instruction set disabled, using SSE arithmetics");
3620 ix86_fpmath = FPMATH_SSE;
3625 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3627 /* If the i387 is disabled, then do not return values in it. */
3629 target_flags &= ~MASK_FLOAT_RETURNS;
3631 /* Use external vectorized library in vectorizing intrinsics. */
3632 if (global_options_set.x_ix86_veclibabi_type)
3633 switch (ix86_veclibabi_type)
3635 case ix86_veclibabi_type_svml:
3636 ix86_veclib_handler = ix86_veclibabi_svml;
3639 case ix86_veclibabi_type_acml:
3640 ix86_veclib_handler = ix86_veclibabi_acml;
3647 if ((!USE_IX86_FRAME_POINTER
3648 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3649 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3651 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3653 /* ??? Unwind info is not correct around the CFG unless either a frame
3654 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3655 unwind info generation to be aware of the CFG and propagating states
3657 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3658 || flag_exceptions || flag_non_call_exceptions)
3659 && flag_omit_frame_pointer
3660 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3662 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3663 warning (0, "unwind tables currently require either a frame pointer "
3664 "or %saccumulate-outgoing-args%s for correctness",
3666 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3669 /* If stack probes are required, the space used for large function
3670 arguments on the stack must also be probed, so enable
3671 -maccumulate-outgoing-args so this happens in the prologue. */
3672 if (TARGET_STACK_PROBE
3673 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3675 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3676 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3677 "for correctness", prefix, suffix);
3678 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3681 /* For sane SSE instruction set generation we need fcomi instruction.
3682 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3683 expands to a sequence that includes conditional move. */
3684 if (TARGET_SSE || TARGET_RDRND)
3687 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3690 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3691 p = strchr (internal_label_prefix, 'X');
3692 internal_label_prefix_len = p - internal_label_prefix;
3696 /* When scheduling description is not available, disable scheduler pass
3697 so it won't slow down the compilation and make x87 code slower. */
3698 if (!TARGET_SCHEDULE)
3699 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3701 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3702 ix86_cost->simultaneous_prefetches,
3703 global_options.x_param_values,
3704 global_options_set.x_param_values);
3705 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3706 global_options.x_param_values,
3707 global_options_set.x_param_values);
3708 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3709 global_options.x_param_values,
3710 global_options_set.x_param_values);
3711 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3712 global_options.x_param_values,
3713 global_options_set.x_param_values);
3715 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3716 if (flag_prefetch_loop_arrays < 0
3719 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3720 flag_prefetch_loop_arrays = 1;
3722 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3723 can be optimized to ap = __builtin_next_arg (0). */
3724 if (!TARGET_64BIT && !flag_split_stack)
3725 targetm.expand_builtin_va_start = NULL;
3729 ix86_gen_leave = gen_leave_rex64;
3730 ix86_gen_add3 = gen_adddi3;
3731 ix86_gen_sub3 = gen_subdi3;
3732 ix86_gen_sub3_carry = gen_subdi3_carry;
3733 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3734 ix86_gen_monitor = gen_sse3_monitor64;
3735 ix86_gen_andsp = gen_anddi3;
3736 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3737 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3738 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3742 ix86_gen_leave = gen_leave;
3743 ix86_gen_add3 = gen_addsi3;
3744 ix86_gen_sub3 = gen_subsi3;
3745 ix86_gen_sub3_carry = gen_subsi3_carry;
3746 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3747 ix86_gen_monitor = gen_sse3_monitor;
3748 ix86_gen_andsp = gen_andsi3;
3749 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3750 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3751 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3755 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3757 target_flags |= MASK_CLD & ~target_flags_explicit;
3760 if (!TARGET_64BIT && flag_pic)
3762 if (flag_fentry > 0)
3763 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3767 else if (TARGET_SEH)
3769 if (flag_fentry == 0)
3770 sorry ("-mno-fentry isn%'t compatible with SEH");
3773 else if (flag_fentry < 0)
3775 #if defined(PROFILE_BEFORE_PROLOGUE)
3784 /* When not optimize for size, enable vzeroupper optimization for
3785 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3786 AVX unaligned load/store. */
3789 if (flag_expensive_optimizations
3790 && !(target_flags_explicit & MASK_VZEROUPPER))
3791 target_flags |= MASK_VZEROUPPER;
3792 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3793 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3794 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3795 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3796 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3797 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3798 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3799 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3800 target_flags |= MASK_PREFER_AVX128;
3805 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3806 target_flags &= ~MASK_VZEROUPPER;
3809 /* Save the initial options in case the user does function specific
3812 target_option_default_node = target_option_current_node
3813 = build_target_option_node ();
3816 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3819 function_pass_avx256_p (const_rtx val)
3824 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3827 if (GET_CODE (val) == PARALLEL)
3832 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3834 r = XVECEXP (val, 0, i);
3835 if (GET_CODE (r) == EXPR_LIST
3837 && REG_P (XEXP (r, 0))
3838 && (GET_MODE (XEXP (r, 0)) == OImode
3839 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3847 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3850 ix86_option_override (void)
3852 ix86_option_override_internal (true);
3855 /* Update register usage after having seen the compiler flags. */
3858 ix86_conditional_register_usage (void)
3863 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3865 if (fixed_regs[i] > 1)
3866 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3867 if (call_used_regs[i] > 1)
3868 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3871 /* The PIC register, if it exists, is fixed. */
3872 j = PIC_OFFSET_TABLE_REGNUM;
3873 if (j != INVALID_REGNUM)
3874 fixed_regs[j] = call_used_regs[j] = 1;
3876 /* The 64-bit MS_ABI changes the set of call-used registers. */
3877 if (TARGET_64BIT_MS_ABI)
3879 call_used_regs[SI_REG] = 0;
3880 call_used_regs[DI_REG] = 0;
3881 call_used_regs[XMM6_REG] = 0;
3882 call_used_regs[XMM7_REG] = 0;
3883 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3884 call_used_regs[i] = 0;
3887 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3888 other call-clobbered regs for 64-bit. */
3891 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3893 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3894 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3895 && call_used_regs[i])
3896 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3899 /* If MMX is disabled, squash the registers. */
3901 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3902 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3903 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3905 /* If SSE is disabled, squash the registers. */
3907 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3908 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3909 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3911 /* If the FPU is disabled, squash the registers. */
3912 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3913 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3914 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3915 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3917 /* If 32-bit, squash the 64-bit registers. */
3920 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3922 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3928 /* Save the current options */
3931 ix86_function_specific_save (struct cl_target_option *ptr)
3933 ptr->arch = ix86_arch;
3934 ptr->schedule = ix86_schedule;
3935 ptr->tune = ix86_tune;
3936 ptr->branch_cost = ix86_branch_cost;
3937 ptr->tune_defaulted = ix86_tune_defaulted;
3938 ptr->arch_specified = ix86_arch_specified;
3939 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3940 ptr->ix86_target_flags_explicit = target_flags_explicit;
3942 /* The fields are char but the variables are not; make sure the
3943 values fit in the fields. */
3944 gcc_assert (ptr->arch == ix86_arch);
3945 gcc_assert (ptr->schedule == ix86_schedule);
3946 gcc_assert (ptr->tune == ix86_tune);
3947 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3950 /* Restore the current options */
3953 ix86_function_specific_restore (struct cl_target_option *ptr)
3955 enum processor_type old_tune = ix86_tune;
3956 enum processor_type old_arch = ix86_arch;
3957 unsigned int ix86_arch_mask, ix86_tune_mask;
3960 ix86_arch = (enum processor_type) ptr->arch;
3961 ix86_schedule = (enum attr_cpu) ptr->schedule;
3962 ix86_tune = (enum processor_type) ptr->tune;
3963 ix86_branch_cost = ptr->branch_cost;
3964 ix86_tune_defaulted = ptr->tune_defaulted;
3965 ix86_arch_specified = ptr->arch_specified;
3966 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
3967 target_flags_explicit = ptr->ix86_target_flags_explicit;
3969 /* Recreate the arch feature tests if the arch changed */
3970 if (old_arch != ix86_arch)
3972 ix86_arch_mask = 1u << ix86_arch;
3973 for (i = 0; i < X86_ARCH_LAST; ++i)
3974 ix86_arch_features[i]
3975 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3978 /* Recreate the tune optimization tests */
3979 if (old_tune != ix86_tune)
3981 ix86_tune_mask = 1u << ix86_tune;
3982 for (i = 0; i < X86_TUNE_LAST; ++i)
3983 ix86_tune_features[i]
3984 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3988 /* Print the current options */
3991 ix86_function_specific_print (FILE *file, int indent,
3992 struct cl_target_option *ptr)
3995 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
3996 NULL, NULL, ptr->x_ix86_fpmath, false);
3998 fprintf (file, "%*sarch = %d (%s)\n",
4001 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4002 ? cpu_names[ptr->arch]
4005 fprintf (file, "%*stune = %d (%s)\n",
4008 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4009 ? cpu_names[ptr->tune]
4012 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4016 fprintf (file, "%*s%s\n", indent, "", target_string);
4017 free (target_string);
4022 /* Inner function to process the attribute((target(...))), take an argument and
4023 set the current options from the argument. If we have a list, recursively go
4027 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4028 struct gcc_options *enum_opts_set)
4033 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4034 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4035 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4036 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4037 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4053 enum ix86_opt_type type;
4058 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4059 IX86_ATTR_ISA ("abm", OPT_mabm),
4060 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4061 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4062 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4063 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4064 IX86_ATTR_ISA ("aes", OPT_maes),
4065 IX86_ATTR_ISA ("avx", OPT_mavx),
4066 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4067 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4068 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4069 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4070 IX86_ATTR_ISA ("sse", OPT_msse),
4071 IX86_ATTR_ISA ("sse2", OPT_msse2),
4072 IX86_ATTR_ISA ("sse3", OPT_msse3),
4073 IX86_ATTR_ISA ("sse4", OPT_msse4),
4074 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4075 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4076 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4077 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4078 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4079 IX86_ATTR_ISA ("xop", OPT_mxop),
4080 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4081 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4082 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4083 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4086 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4088 /* string options */
4089 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4090 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4093 IX86_ATTR_YES ("cld",
4097 IX86_ATTR_NO ("fancy-math-387",
4098 OPT_mfancy_math_387,
4099 MASK_NO_FANCY_MATH_387),
4101 IX86_ATTR_YES ("ieee-fp",
4105 IX86_ATTR_YES ("inline-all-stringops",
4106 OPT_minline_all_stringops,
4107 MASK_INLINE_ALL_STRINGOPS),
4109 IX86_ATTR_YES ("inline-stringops-dynamically",
4110 OPT_minline_stringops_dynamically,
4111 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4113 IX86_ATTR_NO ("align-stringops",
4114 OPT_mno_align_stringops,
4115 MASK_NO_ALIGN_STRINGOPS),
4117 IX86_ATTR_YES ("recip",
4123 /* If this is a list, recurse to get the options. */
4124 if (TREE_CODE (args) == TREE_LIST)
4128 for (; args; args = TREE_CHAIN (args))
4129 if (TREE_VALUE (args)
4130 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4131 p_strings, enum_opts_set))
4137 else if (TREE_CODE (args) != STRING_CST)
4140 /* Handle multiple arguments separated by commas. */
4141 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4143 while (next_optstr && *next_optstr != '\0')
4145 char *p = next_optstr;
4147 char *comma = strchr (next_optstr, ',');
4148 const char *opt_string;
4149 size_t len, opt_len;
4154 enum ix86_opt_type type = ix86_opt_unknown;
4160 len = comma - next_optstr;
4161 next_optstr = comma + 1;
4169 /* Recognize no-xxx. */
4170 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4179 /* Find the option. */
4182 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4184 type = attrs[i].type;
4185 opt_len = attrs[i].len;
4186 if (ch == attrs[i].string[0]
4187 && ((type != ix86_opt_str && type != ix86_opt_enum)
4190 && memcmp (p, attrs[i].string, opt_len) == 0)
4193 mask = attrs[i].mask;
4194 opt_string = attrs[i].string;
4199 /* Process the option. */
4202 error ("attribute(target(\"%s\")) is unknown", orig_p);
4206 else if (type == ix86_opt_isa)
4208 struct cl_decoded_option decoded;
4210 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4211 ix86_handle_option (&global_options, &global_options_set,
4212 &decoded, input_location);
4215 else if (type == ix86_opt_yes || type == ix86_opt_no)
4217 if (type == ix86_opt_no)
4218 opt_set_p = !opt_set_p;
4221 target_flags |= mask;
4223 target_flags &= ~mask;
4226 else if (type == ix86_opt_str)
4230 error ("option(\"%s\") was already specified", opt_string);
4234 p_strings[opt] = xstrdup (p + opt_len);
4237 else if (type == ix86_opt_enum)
4242 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4244 set_option (&global_options, enum_opts_set, opt, value,
4245 p + opt_len, DK_UNSPECIFIED, input_location,
4249 error ("attribute(target(\"%s\")) is unknown", orig_p);
4261 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4264 ix86_valid_target_attribute_tree (tree args)
4266 const char *orig_arch_string = ix86_arch_string;
4267 const char *orig_tune_string = ix86_tune_string;
4268 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4269 int orig_tune_defaulted = ix86_tune_defaulted;
4270 int orig_arch_specified = ix86_arch_specified;
4271 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4274 struct cl_target_option *def
4275 = TREE_TARGET_OPTION (target_option_default_node);
4276 struct gcc_options enum_opts_set;
4278 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4280 /* Process each of the options on the chain. */
4281 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4285 /* If the changed options are different from the default, rerun
4286 ix86_option_override_internal, and then save the options away.
4287 The string options are are attribute options, and will be undone
4288 when we copy the save structure. */
4289 if (ix86_isa_flags != def->x_ix86_isa_flags
4290 || target_flags != def->x_target_flags
4291 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4292 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4293 || enum_opts_set.x_ix86_fpmath)
4295 /* If we are using the default tune= or arch=, undo the string assigned,
4296 and use the default. */
4297 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4298 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4299 else if (!orig_arch_specified)
4300 ix86_arch_string = NULL;
4302 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4303 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4304 else if (orig_tune_defaulted)
4305 ix86_tune_string = NULL;
4307 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4308 if (enum_opts_set.x_ix86_fpmath)
4309 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4310 else if (!TARGET_64BIT && TARGET_SSE)
4312 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4313 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4316 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4317 ix86_option_override_internal (false);
4319 /* Add any builtin functions with the new isa if any. */
4320 ix86_add_new_builtins (ix86_isa_flags);
4322 /* Save the current options unless we are validating options for
4324 t = build_target_option_node ();
4326 ix86_arch_string = orig_arch_string;
4327 ix86_tune_string = orig_tune_string;
4328 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4330 /* Free up memory allocated to hold the strings */
4331 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4332 free (option_strings[i]);
4338 /* Hook to validate attribute((target("string"))). */
4341 ix86_valid_target_attribute_p (tree fndecl,
4342 tree ARG_UNUSED (name),
4344 int ARG_UNUSED (flags))
4346 struct cl_target_option cur_target;
4348 tree old_optimize = build_optimization_node ();
4349 tree new_target, new_optimize;
4350 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4352 /* If the function changed the optimization levels as well as setting target
4353 options, start with the optimizations specified. */
4354 if (func_optimize && func_optimize != old_optimize)
4355 cl_optimization_restore (&global_options,
4356 TREE_OPTIMIZATION (func_optimize));
4358 /* The target attributes may also change some optimization flags, so update
4359 the optimization options if necessary. */
4360 cl_target_option_save (&cur_target, &global_options);
4361 new_target = ix86_valid_target_attribute_tree (args);
4362 new_optimize = build_optimization_node ();
4369 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4371 if (old_optimize != new_optimize)
4372 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4375 cl_target_option_restore (&global_options, &cur_target);
4377 if (old_optimize != new_optimize)
4378 cl_optimization_restore (&global_options,
4379 TREE_OPTIMIZATION (old_optimize));
4385 /* Hook to determine if one function can safely inline another. */
4388 ix86_can_inline_p (tree caller, tree callee)
4391 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4392 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4394 /* If callee has no option attributes, then it is ok to inline. */
4398 /* If caller has no option attributes, but callee does then it is not ok to
4400 else if (!caller_tree)
4405 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4406 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4408 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4409 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4411 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4412 != callee_opts->x_ix86_isa_flags)
4415 /* See if we have the same non-isa options. */
4416 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4419 /* See if arch, tune, etc. are the same. */
4420 else if (caller_opts->arch != callee_opts->arch)
4423 else if (caller_opts->tune != callee_opts->tune)
4426 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4429 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4440 /* Remember the last target of ix86_set_current_function. */
4441 static GTY(()) tree ix86_previous_fndecl;
4443 /* Establish appropriate back-end context for processing the function
4444 FNDECL. The argument might be NULL to indicate processing at top
4445 level, outside of any function scope. */
4447 ix86_set_current_function (tree fndecl)
4449 /* Only change the context if the function changes. This hook is called
4450 several times in the course of compiling a function, and we don't want to
4451 slow things down too much or call target_reinit when it isn't safe. */
4452 if (fndecl && fndecl != ix86_previous_fndecl)
4454 tree old_tree = (ix86_previous_fndecl
4455 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4458 tree new_tree = (fndecl
4459 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4462 ix86_previous_fndecl = fndecl;
4463 if (old_tree == new_tree)
4468 cl_target_option_restore (&global_options,
4469 TREE_TARGET_OPTION (new_tree));
4475 struct cl_target_option *def
4476 = TREE_TARGET_OPTION (target_option_current_node);
4478 cl_target_option_restore (&global_options, def);
4485 /* Return true if this goes in large data/bss. */
4488 ix86_in_large_data_p (tree exp)
4490 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4493 /* Functions are never large data. */
4494 if (TREE_CODE (exp) == FUNCTION_DECL)
4497 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4499 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4500 if (strcmp (section, ".ldata") == 0
4501 || strcmp (section, ".lbss") == 0)
4507 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4509 /* If this is an incomplete type with size 0, then we can't put it
4510 in data because it might be too big when completed. */
4511 if (!size || size > ix86_section_threshold)
4518 /* Switch to the appropriate section for output of DECL.
4519 DECL is either a `VAR_DECL' node or a constant of some sort.
4520 RELOC indicates whether forming the initial value of DECL requires
4521 link-time relocations. */
4523 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4527 x86_64_elf_select_section (tree decl, int reloc,
4528 unsigned HOST_WIDE_INT align)
4530 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4531 && ix86_in_large_data_p (decl))
4533 const char *sname = NULL;
4534 unsigned int flags = SECTION_WRITE;
4535 switch (categorize_decl_for_section (decl, reloc))
4540 case SECCAT_DATA_REL:
4541 sname = ".ldata.rel";
4543 case SECCAT_DATA_REL_LOCAL:
4544 sname = ".ldata.rel.local";
4546 case SECCAT_DATA_REL_RO:
4547 sname = ".ldata.rel.ro";
4549 case SECCAT_DATA_REL_RO_LOCAL:
4550 sname = ".ldata.rel.ro.local";
4554 flags |= SECTION_BSS;
4557 case SECCAT_RODATA_MERGE_STR:
4558 case SECCAT_RODATA_MERGE_STR_INIT:
4559 case SECCAT_RODATA_MERGE_CONST:
4563 case SECCAT_SRODATA:
4570 /* We don't split these for medium model. Place them into
4571 default sections and hope for best. */
4576 /* We might get called with string constants, but get_named_section
4577 doesn't like them as they are not DECLs. Also, we need to set
4578 flags in that case. */
4580 return get_section (sname, flags, NULL);
4581 return get_named_section (decl, sname, reloc);
4584 return default_elf_select_section (decl, reloc, align);
4587 /* Build up a unique section name, expressed as a
4588 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4589 RELOC indicates whether the initial value of EXP requires
4590 link-time relocations. */
4592 static void ATTRIBUTE_UNUSED
4593 x86_64_elf_unique_section (tree decl, int reloc)
4595 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4596 && ix86_in_large_data_p (decl))
4598 const char *prefix = NULL;
4599 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4600 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4602 switch (categorize_decl_for_section (decl, reloc))
4605 case SECCAT_DATA_REL:
4606 case SECCAT_DATA_REL_LOCAL:
4607 case SECCAT_DATA_REL_RO:
4608 case SECCAT_DATA_REL_RO_LOCAL:
4609 prefix = one_only ? ".ld" : ".ldata";
4612 prefix = one_only ? ".lb" : ".lbss";
4615 case SECCAT_RODATA_MERGE_STR:
4616 case SECCAT_RODATA_MERGE_STR_INIT:
4617 case SECCAT_RODATA_MERGE_CONST:
4618 prefix = one_only ? ".lr" : ".lrodata";
4620 case SECCAT_SRODATA:
4627 /* We don't split these for medium model. Place them into
4628 default sections and hope for best. */
4633 const char *name, *linkonce;
4636 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4637 name = targetm.strip_name_encoding (name);
4639 /* If we're using one_only, then there needs to be a .gnu.linkonce
4640 prefix to the section name. */
4641 linkonce = one_only ? ".gnu.linkonce" : "";
4643 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4645 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4649 default_unique_section (decl, reloc);
4652 #ifdef COMMON_ASM_OP
4653 /* This says how to output assembler code to declare an
4654 uninitialized external linkage data object.
4656 For medium model x86-64 we need to use .largecomm opcode for
4659 x86_elf_aligned_common (FILE *file,
4660 const char *name, unsigned HOST_WIDE_INT size,
4663 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4664 && size > (unsigned int)ix86_section_threshold)
4665 fputs (".largecomm\t", file);
4667 fputs (COMMON_ASM_OP, file);
4668 assemble_name (file, name);
4669 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4670 size, align / BITS_PER_UNIT);
4674 /* Utility function for targets to use in implementing
4675 ASM_OUTPUT_ALIGNED_BSS. */
4678 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4679 const char *name, unsigned HOST_WIDE_INT size,
4682 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4683 && size > (unsigned int)ix86_section_threshold)
4684 switch_to_section (get_named_section (decl, ".lbss", 0));
4686 switch_to_section (bss_section);
4687 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4688 #ifdef ASM_DECLARE_OBJECT_NAME
4689 last_assemble_variable_decl = decl;
4690 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4692 /* Standard thing is just output label for the object. */
4693 ASM_OUTPUT_LABEL (file, name);
4694 #endif /* ASM_DECLARE_OBJECT_NAME */
4695 ASM_OUTPUT_SKIP (file, size ? size : 1);
4698 /* Decide whether we must probe the stack before any space allocation
4699 on this target. It's essentially TARGET_STACK_PROBE except when
4700 -fstack-check causes the stack to be already probed differently. */
4703 ix86_target_stack_probe (void)
4705 /* Do not probe the stack twice if static stack checking is enabled. */
4706 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4709 return TARGET_STACK_PROBE;
4712 /* Decide whether we can make a sibling call to a function. DECL is the
4713 declaration of the function being targeted by the call and EXP is the
4714 CALL_EXPR representing the call. */
4717 ix86_function_ok_for_sibcall (tree decl, tree exp)
4719 tree type, decl_or_type;
4722 /* If we are generating position-independent code, we cannot sibcall
4723 optimize any indirect call, or a direct call to a global function,
4724 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4728 && (!decl || !targetm.binds_local_p (decl)))
4731 /* If we need to align the outgoing stack, then sibcalling would
4732 unalign the stack, which may break the called function. */
4733 if (ix86_minimum_incoming_stack_boundary (true)
4734 < PREFERRED_STACK_BOUNDARY)
4739 decl_or_type = decl;
4740 type = TREE_TYPE (decl);
4744 /* We're looking at the CALL_EXPR, we need the type of the function. */
4745 type = CALL_EXPR_FN (exp); /* pointer expression */
4746 type = TREE_TYPE (type); /* pointer type */
4747 type = TREE_TYPE (type); /* function type */
4748 decl_or_type = type;
4751 /* Check that the return value locations are the same. Like
4752 if we are returning floats on the 80387 register stack, we cannot
4753 make a sibcall from a function that doesn't return a float to a
4754 function that does or, conversely, from a function that does return
4755 a float to a function that doesn't; the necessary stack adjustment
4756 would not be executed. This is also the place we notice
4757 differences in the return value ABI. Note that it is ok for one
4758 of the functions to have void return type as long as the return
4759 value of the other is passed in a register. */
4760 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4761 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4763 if (STACK_REG_P (a) || STACK_REG_P (b))
4765 if (!rtx_equal_p (a, b))
4768 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4770 /* Disable sibcall if we need to generate vzeroupper after
4772 if (TARGET_VZEROUPPER
4773 && cfun->machine->callee_return_avx256_p
4774 && !cfun->machine->caller_return_avx256_p)
4777 else if (!rtx_equal_p (a, b))
4782 /* The SYSV ABI has more call-clobbered registers;
4783 disallow sibcalls from MS to SYSV. */
4784 if (cfun->machine->call_abi == MS_ABI
4785 && ix86_function_type_abi (type) == SYSV_ABI)
4790 /* If this call is indirect, we'll need to be able to use a
4791 call-clobbered register for the address of the target function.
4792 Make sure that all such registers are not used for passing
4793 parameters. Note that DLLIMPORT functions are indirect. */
4795 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4797 if (ix86_function_regparm (type, NULL) >= 3)
4799 /* ??? Need to count the actual number of registers to be used,
4800 not the possible number of registers. Fix later. */
4806 /* Otherwise okay. That also includes certain types of indirect calls. */
4810 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4811 and "sseregparm" calling convention attributes;
4812 arguments as in struct attribute_spec.handler. */
4815 ix86_handle_cconv_attribute (tree *node, tree name,
4817 int flags ATTRIBUTE_UNUSED,
4820 if (TREE_CODE (*node) != FUNCTION_TYPE
4821 && TREE_CODE (*node) != METHOD_TYPE
4822 && TREE_CODE (*node) != FIELD_DECL
4823 && TREE_CODE (*node) != TYPE_DECL)
4825 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4827 *no_add_attrs = true;
4831 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4832 if (is_attribute_p ("regparm", name))
4836 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4838 error ("fastcall and regparm attributes are not compatible");
4841 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4843 error ("regparam and thiscall attributes are not compatible");
4846 cst = TREE_VALUE (args);
4847 if (TREE_CODE (cst) != INTEGER_CST)
4849 warning (OPT_Wattributes,
4850 "%qE attribute requires an integer constant argument",
4852 *no_add_attrs = true;
4854 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4856 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4858 *no_add_attrs = true;
4866 /* Do not warn when emulating the MS ABI. */
4867 if ((TREE_CODE (*node) != FUNCTION_TYPE
4868 && TREE_CODE (*node) != METHOD_TYPE)
4869 || ix86_function_type_abi (*node) != MS_ABI)
4870 warning (OPT_Wattributes, "%qE attribute ignored",
4872 *no_add_attrs = true;
4876 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4877 if (is_attribute_p ("fastcall", name))
4879 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4881 error ("fastcall and cdecl attributes are not compatible");
4883 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4885 error ("fastcall and stdcall attributes are not compatible");
4887 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4889 error ("fastcall and regparm attributes are not compatible");
4891 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4893 error ("fastcall and thiscall attributes are not compatible");
4897 /* Can combine stdcall with fastcall (redundant), regparm and
4899 else if (is_attribute_p ("stdcall", name))
4901 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4903 error ("stdcall and cdecl attributes are not compatible");
4905 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4907 error ("stdcall and fastcall attributes are not compatible");
4909 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4911 error ("stdcall and thiscall attributes are not compatible");
4915 /* Can combine cdecl with regparm and sseregparm. */
4916 else if (is_attribute_p ("cdecl", name))
4918 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4920 error ("stdcall and cdecl attributes are not compatible");
4922 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4924 error ("fastcall and cdecl attributes are not compatible");
4926 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4928 error ("cdecl and thiscall attributes are not compatible");
4931 else if (is_attribute_p ("thiscall", name))
4933 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4934 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4936 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4938 error ("stdcall and thiscall attributes are not compatible");
4940 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4942 error ("fastcall and thiscall attributes are not compatible");
4944 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4946 error ("cdecl and thiscall attributes are not compatible");
4950 /* Can combine sseregparm with all attributes. */
4955 /* This function determines from TYPE the calling-convention. */
4958 ix86_get_callcvt (const_tree type)
4960 unsigned int ret = 0;
4965 return IX86_CALLCVT_CDECL;
4967 attrs = TYPE_ATTRIBUTES (type);
4968 if (attrs != NULL_TREE)
4970 if (lookup_attribute ("cdecl", attrs))
4971 ret |= IX86_CALLCVT_CDECL;
4972 else if (lookup_attribute ("stdcall", attrs))
4973 ret |= IX86_CALLCVT_STDCALL;
4974 else if (lookup_attribute ("fastcall", attrs))
4975 ret |= IX86_CALLCVT_FASTCALL;
4976 else if (lookup_attribute ("thiscall", attrs))
4977 ret |= IX86_CALLCVT_THISCALL;
4979 /* Regparam isn't allowed for thiscall and fastcall. */
4980 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
4982 if (lookup_attribute ("regparm", attrs))
4983 ret |= IX86_CALLCVT_REGPARM;
4984 if (lookup_attribute ("sseregparm", attrs))
4985 ret |= IX86_CALLCVT_SSEREGPARM;
4988 if (IX86_BASE_CALLCVT(ret) != 0)
4992 is_stdarg = stdarg_p (type);
4993 if (TARGET_RTD && !is_stdarg)
4994 return IX86_CALLCVT_STDCALL | ret;
4998 || TREE_CODE (type) != METHOD_TYPE
4999 || ix86_function_type_abi (type) != MS_ABI)
5000 return IX86_CALLCVT_CDECL | ret;
5002 return IX86_CALLCVT_THISCALL;
5005 /* Return 0 if the attributes for two types are incompatible, 1 if they
5006 are compatible, and 2 if they are nearly compatible (which causes a
5007 warning to be generated). */
5010 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5012 unsigned int ccvt1, ccvt2;
5014 if (TREE_CODE (type1) != FUNCTION_TYPE
5015 && TREE_CODE (type1) != METHOD_TYPE)
5018 ccvt1 = ix86_get_callcvt (type1);
5019 ccvt2 = ix86_get_callcvt (type2);
5022 if (ix86_function_regparm (type1, NULL)
5023 != ix86_function_regparm (type2, NULL))
5029 /* Return the regparm value for a function with the indicated TYPE and DECL.
5030 DECL may be NULL when calling function indirectly
5031 or considering a libcall. */
5034 ix86_function_regparm (const_tree type, const_tree decl)
5041 return (ix86_function_type_abi (type) == SYSV_ABI
5042 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5043 ccvt = ix86_get_callcvt (type);
5044 regparm = ix86_regparm;
5046 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5048 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5051 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5055 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5057 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5060 /* Use register calling convention for local functions when possible. */
5062 && TREE_CODE (decl) == FUNCTION_DECL
5064 && !(profile_flag && !flag_fentry))
5066 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5067 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5068 if (i && i->local && i->can_change_signature)
5070 int local_regparm, globals = 0, regno;
5072 /* Make sure no regparm register is taken by a
5073 fixed register variable. */
5074 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5075 if (fixed_regs[local_regparm])
5078 /* We don't want to use regparm(3) for nested functions as
5079 these use a static chain pointer in the third argument. */
5080 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5083 /* In 32-bit mode save a register for the split stack. */
5084 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5087 /* Each fixed register usage increases register pressure,
5088 so less registers should be used for argument passing.
5089 This functionality can be overriden by an explicit
5091 for (regno = 0; regno <= DI_REG; regno++)
5092 if (fixed_regs[regno])
5096 = globals < local_regparm ? local_regparm - globals : 0;
5098 if (local_regparm > regparm)
5099 regparm = local_regparm;
5106 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5107 DFmode (2) arguments in SSE registers for a function with the
5108 indicated TYPE and DECL. DECL may be NULL when calling function
5109 indirectly or considering a libcall. Otherwise return 0. */
5112 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5114 gcc_assert (!TARGET_64BIT);
5116 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5117 by the sseregparm attribute. */
5118 if (TARGET_SSEREGPARM
5119 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5126 error ("calling %qD with attribute sseregparm without "
5127 "SSE/SSE2 enabled", decl);
5129 error ("calling %qT with attribute sseregparm without "
5130 "SSE/SSE2 enabled", type);
5138 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5139 (and DFmode for SSE2) arguments in SSE registers. */
5140 if (decl && TARGET_SSE_MATH && optimize
5141 && !(profile_flag && !flag_fentry))
5143 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5144 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5145 if (i && i->local && i->can_change_signature)
5146 return TARGET_SSE2 ? 2 : 1;
5152 /* Return true if EAX is live at the start of the function. Used by
5153 ix86_expand_prologue to determine if we need special help before
5154 calling allocate_stack_worker. */
5157 ix86_eax_live_at_start_p (void)
5159 /* Cheat. Don't bother working forward from ix86_function_regparm
5160 to the function type to whether an actual argument is located in
5161 eax. Instead just look at cfg info, which is still close enough
5162 to correct at this point. This gives false positives for broken
5163 functions that might use uninitialized data that happens to be
5164 allocated in eax, but who cares? */
5165 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5169 ix86_keep_aggregate_return_pointer (tree fntype)
5175 attr = lookup_attribute ("callee_pop_aggregate_return",
5176 TYPE_ATTRIBUTES (fntype));
5178 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5180 /* For 32-bit MS-ABI the default is to keep aggregate
5182 if (ix86_function_type_abi (fntype) == MS_ABI)
5185 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5188 /* Value is the number of bytes of arguments automatically
5189 popped when returning from a subroutine call.
5190 FUNDECL is the declaration node of the function (as a tree),
5191 FUNTYPE is the data type of the function (as a tree),
5192 or for a library call it is an identifier node for the subroutine name.
5193 SIZE is the number of bytes of arguments passed on the stack.
5195 On the 80386, the RTD insn may be used to pop them if the number
5196 of args is fixed, but if the number is variable then the caller
5197 must pop them all. RTD can't be used for library calls now
5198 because the library is compiled with the Unix compiler.
5199 Use of RTD is a selectable option, since it is incompatible with
5200 standard Unix calling sequences. If the option is not selected,
5201 the caller must always pop the args.
5203 The attribute stdcall is equivalent to RTD on a per module basis. */
5206 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5210 /* None of the 64-bit ABIs pop arguments. */
5214 ccvt = ix86_get_callcvt (funtype);
5216 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5217 | IX86_CALLCVT_THISCALL)) != 0
5218 && ! stdarg_p (funtype))
5221 /* Lose any fake structure return argument if it is passed on the stack. */
5222 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5223 && !ix86_keep_aggregate_return_pointer (funtype))
5225 int nregs = ix86_function_regparm (funtype, fundecl);
5227 return GET_MODE_SIZE (Pmode);
5233 /* Argument support functions. */
5235 /* Return true when register may be used to pass function parameters. */
5237 ix86_function_arg_regno_p (int regno)
5240 const int *parm_regs;
5245 return (regno < REGPARM_MAX
5246 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5248 return (regno < REGPARM_MAX
5249 || (TARGET_MMX && MMX_REGNO_P (regno)
5250 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5251 || (TARGET_SSE && SSE_REGNO_P (regno)
5252 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5257 if (SSE_REGNO_P (regno) && TARGET_SSE)
5262 if (TARGET_SSE && SSE_REGNO_P (regno)
5263 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5267 /* TODO: The function should depend on current function ABI but
5268 builtins.c would need updating then. Therefore we use the
5271 /* RAX is used as hidden argument to va_arg functions. */
5272 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5275 if (ix86_abi == MS_ABI)
5276 parm_regs = x86_64_ms_abi_int_parameter_registers;
5278 parm_regs = x86_64_int_parameter_registers;
5279 for (i = 0; i < (ix86_abi == MS_ABI
5280 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5281 if (regno == parm_regs[i])
5286 /* Return if we do not know how to pass TYPE solely in registers. */
5289 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5291 if (must_pass_in_stack_var_size_or_pad (mode, type))
5294 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5295 The layout_type routine is crafty and tries to trick us into passing
5296 currently unsupported vector types on the stack by using TImode. */
5297 return (!TARGET_64BIT && mode == TImode
5298 && type && TREE_CODE (type) != VECTOR_TYPE);
5301 /* It returns the size, in bytes, of the area reserved for arguments passed
5302 in registers for the function represented by fndecl dependent to the used
5305 ix86_reg_parm_stack_space (const_tree fndecl)
5307 enum calling_abi call_abi = SYSV_ABI;
5308 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5309 call_abi = ix86_function_abi (fndecl);
5311 call_abi = ix86_function_type_abi (fndecl);
5312 if (TARGET_64BIT && call_abi == MS_ABI)
5317 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5320 ix86_function_type_abi (const_tree fntype)
5322 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5324 enum calling_abi abi = ix86_abi;
5325 if (abi == SYSV_ABI)
5327 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5330 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5338 ix86_function_ms_hook_prologue (const_tree fn)
5340 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5342 if (decl_function_context (fn) != NULL_TREE)
5343 error_at (DECL_SOURCE_LOCATION (fn),
5344 "ms_hook_prologue is not compatible with nested function");
5351 static enum calling_abi
5352 ix86_function_abi (const_tree fndecl)
5356 return ix86_function_type_abi (TREE_TYPE (fndecl));
5359 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5362 ix86_cfun_abi (void)
5366 return cfun->machine->call_abi;
5369 /* Write the extra assembler code needed to declare a function properly. */
5372 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5375 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5379 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5380 unsigned int filler_cc = 0xcccccccc;
5382 for (i = 0; i < filler_count; i += 4)
5383 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5386 #ifdef SUBTARGET_ASM_UNWIND_INIT
5387 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5390 ASM_OUTPUT_LABEL (asm_out_file, fname);
5392 /* Output magic byte marker, if hot-patch attribute is set. */
5397 /* leaq [%rsp + 0], %rsp */
5398 asm_fprintf (asm_out_file, ASM_BYTE
5399 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5403 /* movl.s %edi, %edi
5405 movl.s %esp, %ebp */
5406 asm_fprintf (asm_out_file, ASM_BYTE
5407 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5413 extern void init_regs (void);
5415 /* Implementation of call abi switching target hook. Specific to FNDECL
5416 the specific call register sets are set. See also
5417 ix86_conditional_register_usage for more details. */
5419 ix86_call_abi_override (const_tree fndecl)
5421 if (fndecl == NULL_TREE)
5422 cfun->machine->call_abi = ix86_abi;
5424 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5427 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5428 expensive re-initialization of init_regs each time we switch function context
5429 since this is needed only during RTL expansion. */
5431 ix86_maybe_switch_abi (void)
5434 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5438 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5439 for a call to a function whose data type is FNTYPE.
5440 For a library call, FNTYPE is 0. */
5443 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5444 tree fntype, /* tree ptr for function decl */
5445 rtx libname, /* SYMBOL_REF of library name or 0 */
5449 struct cgraph_local_info *i;
5452 memset (cum, 0, sizeof (*cum));
5454 /* Initialize for the current callee. */
5457 cfun->machine->callee_pass_avx256_p = false;
5458 cfun->machine->callee_return_avx256_p = false;
5463 i = cgraph_local_info (fndecl);
5464 cum->call_abi = ix86_function_abi (fndecl);
5465 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5470 cum->call_abi = ix86_function_type_abi (fntype);
5472 fnret_type = TREE_TYPE (fntype);
5477 if (TARGET_VZEROUPPER && fnret_type)
5479 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5481 if (function_pass_avx256_p (fnret_value))
5483 /* The return value of this function uses 256bit AVX modes. */
5485 cfun->machine->callee_return_avx256_p = true;
5487 cfun->machine->caller_return_avx256_p = true;
5491 cum->caller = caller;
5493 /* Set up the number of registers to use for passing arguments. */
5495 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5496 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5497 "or subtarget optimization implying it");
5498 cum->nregs = ix86_regparm;
5501 cum->nregs = (cum->call_abi == SYSV_ABI
5502 ? X86_64_REGPARM_MAX
5503 : X86_64_MS_REGPARM_MAX);
5507 cum->sse_nregs = SSE_REGPARM_MAX;
5510 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5511 ? X86_64_SSE_REGPARM_MAX
5512 : X86_64_MS_SSE_REGPARM_MAX);
5516 cum->mmx_nregs = MMX_REGPARM_MAX;
5517 cum->warn_avx = true;
5518 cum->warn_sse = true;
5519 cum->warn_mmx = true;
5521 /* Because type might mismatch in between caller and callee, we need to
5522 use actual type of function for local calls.
5523 FIXME: cgraph_analyze can be told to actually record if function uses
5524 va_start so for local functions maybe_vaarg can be made aggressive
5526 FIXME: once typesytem is fixed, we won't need this code anymore. */
5527 if (i && i->local && i->can_change_signature)
5528 fntype = TREE_TYPE (fndecl);
5529 cum->maybe_vaarg = (fntype
5530 ? (!prototype_p (fntype) || stdarg_p (fntype))
5535 /* If there are variable arguments, then we won't pass anything
5536 in registers in 32-bit mode. */
5537 if (stdarg_p (fntype))
5548 /* Use ecx and edx registers if function has fastcall attribute,
5549 else look for regparm information. */
5552 unsigned int ccvt = ix86_get_callcvt (fntype);
5553 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5556 cum->fastcall = 1; /* Same first register as in fastcall. */
5558 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5564 cum->nregs = ix86_function_regparm (fntype, fndecl);
5567 /* Set up the number of SSE registers used for passing SFmode
5568 and DFmode arguments. Warn for mismatching ABI. */
5569 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5573 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5574 But in the case of vector types, it is some vector mode.
5576 When we have only some of our vector isa extensions enabled, then there
5577 are some modes for which vector_mode_supported_p is false. For these
5578 modes, the generic vector support in gcc will choose some non-vector mode
5579 in order to implement the type. By computing the natural mode, we'll
5580 select the proper ABI location for the operand and not depend on whatever
5581 the middle-end decides to do with these vector types.
5583 The midde-end can't deal with the vector types > 16 bytes. In this
5584 case, we return the original mode and warn ABI change if CUM isn't
5587 static enum machine_mode
5588 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5590 enum machine_mode mode = TYPE_MODE (type);
5592 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5594 HOST_WIDE_INT size = int_size_in_bytes (type);
5595 if ((size == 8 || size == 16 || size == 32)
5596 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5597 && TYPE_VECTOR_SUBPARTS (type) > 1)
5599 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5601 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5602 mode = MIN_MODE_VECTOR_FLOAT;
5604 mode = MIN_MODE_VECTOR_INT;
5606 /* Get the mode which has this inner mode and number of units. */
5607 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5608 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5609 && GET_MODE_INNER (mode) == innermode)
5611 if (size == 32 && !TARGET_AVX)
5613 static bool warnedavx;
5620 warning (0, "AVX vector argument without AVX "
5621 "enabled changes the ABI");
5623 return TYPE_MODE (type);
5636 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5637 this may not agree with the mode that the type system has chosen for the
5638 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5639 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5642 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5647 if (orig_mode != BLKmode)
5648 tmp = gen_rtx_REG (orig_mode, regno);
5651 tmp = gen_rtx_REG (mode, regno);
5652 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5653 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5659 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5660 of this code is to classify each 8bytes of incoming argument by the register
5661 class and assign registers accordingly. */
5663 /* Return the union class of CLASS1 and CLASS2.
5664 See the x86-64 PS ABI for details. */
5666 static enum x86_64_reg_class
5667 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5669 /* Rule #1: If both classes are equal, this is the resulting class. */
5670 if (class1 == class2)
5673 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5675 if (class1 == X86_64_NO_CLASS)
5677 if (class2 == X86_64_NO_CLASS)
5680 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5681 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5682 return X86_64_MEMORY_CLASS;
5684 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5685 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5686 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5687 return X86_64_INTEGERSI_CLASS;
5688 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5689 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5690 return X86_64_INTEGER_CLASS;
5692 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5694 if (class1 == X86_64_X87_CLASS
5695 || class1 == X86_64_X87UP_CLASS
5696 || class1 == X86_64_COMPLEX_X87_CLASS
5697 || class2 == X86_64_X87_CLASS
5698 || class2 == X86_64_X87UP_CLASS
5699 || class2 == X86_64_COMPLEX_X87_CLASS)
5700 return X86_64_MEMORY_CLASS;
5702 /* Rule #6: Otherwise class SSE is used. */
5703 return X86_64_SSE_CLASS;
5706 /* Classify the argument of type TYPE and mode MODE.
5707 CLASSES will be filled by the register class used to pass each word
5708 of the operand. The number of words is returned. In case the parameter
5709 should be passed in memory, 0 is returned. As a special case for zero
5710 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5712 BIT_OFFSET is used internally for handling records and specifies offset
5713 of the offset in bits modulo 256 to avoid overflow cases.
5715 See the x86-64 PS ABI for details.
5719 classify_argument (enum machine_mode mode, const_tree type,
5720 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5722 HOST_WIDE_INT bytes =
5723 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5724 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5726 /* Variable sized entities are always passed/returned in memory. */
5730 if (mode != VOIDmode
5731 && targetm.calls.must_pass_in_stack (mode, type))
5734 if (type && AGGREGATE_TYPE_P (type))
5738 enum x86_64_reg_class subclasses[MAX_CLASSES];
5740 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5744 for (i = 0; i < words; i++)
5745 classes[i] = X86_64_NO_CLASS;
5747 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5748 signalize memory class, so handle it as special case. */
5751 classes[0] = X86_64_NO_CLASS;
5755 /* Classify each field of record and merge classes. */
5756 switch (TREE_CODE (type))
5759 /* And now merge the fields of structure. */
5760 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5762 if (TREE_CODE (field) == FIELD_DECL)
5766 if (TREE_TYPE (field) == error_mark_node)
5769 /* Bitfields are always classified as integer. Handle them
5770 early, since later code would consider them to be
5771 misaligned integers. */
5772 if (DECL_BIT_FIELD (field))
5774 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5775 i < ((int_bit_position (field) + (bit_offset % 64))
5776 + tree_low_cst (DECL_SIZE (field), 0)
5779 merge_classes (X86_64_INTEGER_CLASS,
5786 type = TREE_TYPE (field);
5788 /* Flexible array member is ignored. */
5789 if (TYPE_MODE (type) == BLKmode
5790 && TREE_CODE (type) == ARRAY_TYPE
5791 && TYPE_SIZE (type) == NULL_TREE
5792 && TYPE_DOMAIN (type) != NULL_TREE
5793 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5798 if (!warned && warn_psabi)
5801 inform (input_location,
5802 "the ABI of passing struct with"
5803 " a flexible array member has"
5804 " changed in GCC 4.4");
5808 num = classify_argument (TYPE_MODE (type), type,
5810 (int_bit_position (field)
5811 + bit_offset) % 256);
5814 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5815 for (i = 0; i < num && (i + pos) < words; i++)
5817 merge_classes (subclasses[i], classes[i + pos]);
5824 /* Arrays are handled as small records. */
5827 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5828 TREE_TYPE (type), subclasses, bit_offset);
5832 /* The partial classes are now full classes. */
5833 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5834 subclasses[0] = X86_64_SSE_CLASS;
5835 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5836 && !((bit_offset % 64) == 0 && bytes == 4))
5837 subclasses[0] = X86_64_INTEGER_CLASS;
5839 for (i = 0; i < words; i++)
5840 classes[i] = subclasses[i % num];
5845 case QUAL_UNION_TYPE:
5846 /* Unions are similar to RECORD_TYPE but offset is always 0.
5848 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5850 if (TREE_CODE (field) == FIELD_DECL)
5854 if (TREE_TYPE (field) == error_mark_node)
5857 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5858 TREE_TYPE (field), subclasses,
5862 for (i = 0; i < num; i++)
5863 classes[i] = merge_classes (subclasses[i], classes[i]);
5874 /* When size > 16 bytes, if the first one isn't
5875 X86_64_SSE_CLASS or any other ones aren't
5876 X86_64_SSEUP_CLASS, everything should be passed in
5878 if (classes[0] != X86_64_SSE_CLASS)
5881 for (i = 1; i < words; i++)
5882 if (classes[i] != X86_64_SSEUP_CLASS)
5886 /* Final merger cleanup. */
5887 for (i = 0; i < words; i++)
5889 /* If one class is MEMORY, everything should be passed in
5891 if (classes[i] == X86_64_MEMORY_CLASS)
5894 /* The X86_64_SSEUP_CLASS should be always preceded by
5895 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5896 if (classes[i] == X86_64_SSEUP_CLASS
5897 && classes[i - 1] != X86_64_SSE_CLASS
5898 && classes[i - 1] != X86_64_SSEUP_CLASS)
5900 /* The first one should never be X86_64_SSEUP_CLASS. */
5901 gcc_assert (i != 0);
5902 classes[i] = X86_64_SSE_CLASS;
5905 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5906 everything should be passed in memory. */
5907 if (classes[i] == X86_64_X87UP_CLASS
5908 && (classes[i - 1] != X86_64_X87_CLASS))
5912 /* The first one should never be X86_64_X87UP_CLASS. */
5913 gcc_assert (i != 0);
5914 if (!warned && warn_psabi)
5917 inform (input_location,
5918 "the ABI of passing union with long double"
5919 " has changed in GCC 4.4");
5927 /* Compute alignment needed. We align all types to natural boundaries with
5928 exception of XFmode that is aligned to 64bits. */
5929 if (mode != VOIDmode && mode != BLKmode)
5931 int mode_alignment = GET_MODE_BITSIZE (mode);
5934 mode_alignment = 128;
5935 else if (mode == XCmode)
5936 mode_alignment = 256;
5937 if (COMPLEX_MODE_P (mode))
5938 mode_alignment /= 2;
5939 /* Misaligned fields are always returned in memory. */
5940 if (bit_offset % mode_alignment)
5944 /* for V1xx modes, just use the base mode */
5945 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5946 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5947 mode = GET_MODE_INNER (mode);
5949 /* Classification of atomic types. */
5954 classes[0] = X86_64_SSE_CLASS;
5957 classes[0] = X86_64_SSE_CLASS;
5958 classes[1] = X86_64_SSEUP_CLASS;
5968 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5972 classes[0] = X86_64_INTEGERSI_CLASS;
5975 else if (size <= 64)
5977 classes[0] = X86_64_INTEGER_CLASS;
5980 else if (size <= 64+32)
5982 classes[0] = X86_64_INTEGER_CLASS;
5983 classes[1] = X86_64_INTEGERSI_CLASS;
5986 else if (size <= 64+64)
5988 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5996 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6000 /* OImode shouldn't be used directly. */
6005 if (!(bit_offset % 64))
6006 classes[0] = X86_64_SSESF_CLASS;
6008 classes[0] = X86_64_SSE_CLASS;
6011 classes[0] = X86_64_SSEDF_CLASS;
6014 classes[0] = X86_64_X87_CLASS;
6015 classes[1] = X86_64_X87UP_CLASS;
6018 classes[0] = X86_64_SSE_CLASS;
6019 classes[1] = X86_64_SSEUP_CLASS;
6022 classes[0] = X86_64_SSE_CLASS;
6023 if (!(bit_offset % 64))
6029 if (!warned && warn_psabi)
6032 inform (input_location,
6033 "the ABI of passing structure with complex float"
6034 " member has changed in GCC 4.4");
6036 classes[1] = X86_64_SSESF_CLASS;
6040 classes[0] = X86_64_SSEDF_CLASS;
6041 classes[1] = X86_64_SSEDF_CLASS;
6044 classes[0] = X86_64_COMPLEX_X87_CLASS;
6047 /* This modes is larger than 16 bytes. */
6055 classes[0] = X86_64_SSE_CLASS;
6056 classes[1] = X86_64_SSEUP_CLASS;
6057 classes[2] = X86_64_SSEUP_CLASS;
6058 classes[3] = X86_64_SSEUP_CLASS;
6066 classes[0] = X86_64_SSE_CLASS;
6067 classes[1] = X86_64_SSEUP_CLASS;
6075 classes[0] = X86_64_SSE_CLASS;
6081 gcc_assert (VECTOR_MODE_P (mode));
6086 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6088 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6089 classes[0] = X86_64_INTEGERSI_CLASS;
6091 classes[0] = X86_64_INTEGER_CLASS;
6092 classes[1] = X86_64_INTEGER_CLASS;
6093 return 1 + (bytes > 8);
6097 /* Examine the argument and return set number of register required in each
6098 class. Return 0 iff parameter should be passed in memory. */
6100 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6101 int *int_nregs, int *sse_nregs)
6103 enum x86_64_reg_class regclass[MAX_CLASSES];
6104 int n = classify_argument (mode, type, regclass, 0);
6110 for (n--; n >= 0; n--)
6111 switch (regclass[n])
6113 case X86_64_INTEGER_CLASS:
6114 case X86_64_INTEGERSI_CLASS:
6117 case X86_64_SSE_CLASS:
6118 case X86_64_SSESF_CLASS:
6119 case X86_64_SSEDF_CLASS:
6122 case X86_64_NO_CLASS:
6123 case X86_64_SSEUP_CLASS:
6125 case X86_64_X87_CLASS:
6126 case X86_64_X87UP_CLASS:
6130 case X86_64_COMPLEX_X87_CLASS:
6131 return in_return ? 2 : 0;
6132 case X86_64_MEMORY_CLASS:
6138 /* Construct container for the argument used by GCC interface. See
6139 FUNCTION_ARG for the detailed description. */
6142 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6143 const_tree type, int in_return, int nintregs, int nsseregs,
6144 const int *intreg, int sse_regno)
6146 /* The following variables hold the static issued_error state. */
6147 static bool issued_sse_arg_error;
6148 static bool issued_sse_ret_error;
6149 static bool issued_x87_ret_error;
6151 enum machine_mode tmpmode;
6153 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6154 enum x86_64_reg_class regclass[MAX_CLASSES];
6158 int needed_sseregs, needed_intregs;
6159 rtx exp[MAX_CLASSES];
6162 n = classify_argument (mode, type, regclass, 0);
6165 if (!examine_argument (mode, type, in_return, &needed_intregs,
6168 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6171 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6172 some less clueful developer tries to use floating-point anyway. */
6173 if (needed_sseregs && !TARGET_SSE)
6177 if (!issued_sse_ret_error)
6179 error ("SSE register return with SSE disabled");
6180 issued_sse_ret_error = true;
6183 else if (!issued_sse_arg_error)
6185 error ("SSE register argument with SSE disabled");
6186 issued_sse_arg_error = true;
6191 /* Likewise, error if the ABI requires us to return values in the
6192 x87 registers and the user specified -mno-80387. */
6193 if (!TARGET_80387 && in_return)
6194 for (i = 0; i < n; i++)
6195 if (regclass[i] == X86_64_X87_CLASS
6196 || regclass[i] == X86_64_X87UP_CLASS
6197 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6199 if (!issued_x87_ret_error)
6201 error ("x87 register return with x87 disabled");
6202 issued_x87_ret_error = true;
6207 /* First construct simple cases. Avoid SCmode, since we want to use
6208 single register to pass this type. */
6209 if (n == 1 && mode != SCmode)
6210 switch (regclass[0])
6212 case X86_64_INTEGER_CLASS:
6213 case X86_64_INTEGERSI_CLASS:
6214 return gen_rtx_REG (mode, intreg[0]);
6215 case X86_64_SSE_CLASS:
6216 case X86_64_SSESF_CLASS:
6217 case X86_64_SSEDF_CLASS:
6218 if (mode != BLKmode)
6219 return gen_reg_or_parallel (mode, orig_mode,
6220 SSE_REGNO (sse_regno));
6222 case X86_64_X87_CLASS:
6223 case X86_64_COMPLEX_X87_CLASS:
6224 return gen_rtx_REG (mode, FIRST_STACK_REG);
6225 case X86_64_NO_CLASS:
6226 /* Zero sized array, struct or class. */
6231 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6232 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6233 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6235 && regclass[0] == X86_64_SSE_CLASS
6236 && regclass[1] == X86_64_SSEUP_CLASS
6237 && regclass[2] == X86_64_SSEUP_CLASS
6238 && regclass[3] == X86_64_SSEUP_CLASS
6240 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6243 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6244 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6245 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6246 && regclass[1] == X86_64_INTEGER_CLASS
6247 && (mode == CDImode || mode == TImode || mode == TFmode)
6248 && intreg[0] + 1 == intreg[1])
6249 return gen_rtx_REG (mode, intreg[0]);
6251 /* Otherwise figure out the entries of the PARALLEL. */
6252 for (i = 0; i < n; i++)
6256 switch (regclass[i])
6258 case X86_64_NO_CLASS:
6260 case X86_64_INTEGER_CLASS:
6261 case X86_64_INTEGERSI_CLASS:
6262 /* Merge TImodes on aligned occasions here too. */
6263 if (i * 8 + 8 > bytes)
6264 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6265 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6269 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6270 if (tmpmode == BLKmode)
6272 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6273 gen_rtx_REG (tmpmode, *intreg),
6277 case X86_64_SSESF_CLASS:
6278 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6279 gen_rtx_REG (SFmode,
6280 SSE_REGNO (sse_regno)),
6284 case X86_64_SSEDF_CLASS:
6285 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6286 gen_rtx_REG (DFmode,
6287 SSE_REGNO (sse_regno)),
6291 case X86_64_SSE_CLASS:
6299 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6309 && regclass[1] == X86_64_SSEUP_CLASS
6310 && regclass[2] == X86_64_SSEUP_CLASS
6311 && regclass[3] == X86_64_SSEUP_CLASS);
6318 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6319 gen_rtx_REG (tmpmode,
6320 SSE_REGNO (sse_regno)),
6329 /* Empty aligned struct, union or class. */
6333 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6334 for (i = 0; i < nexps; i++)
6335 XVECEXP (ret, 0, i) = exp [i];
6339 /* Update the data in CUM to advance over an argument of mode MODE
6340 and data type TYPE. (TYPE is null for libcalls where that information
6341 may not be available.) */
6344 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6345 const_tree type, HOST_WIDE_INT bytes,
6346 HOST_WIDE_INT words)
6362 cum->words += words;
6363 cum->nregs -= words;
6364 cum->regno += words;
6366 if (cum->nregs <= 0)
6374 /* OImode shouldn't be used directly. */
6378 if (cum->float_in_sse < 2)
6381 if (cum->float_in_sse < 1)
6398 if (!type || !AGGREGATE_TYPE_P (type))
6400 cum->sse_words += words;
6401 cum->sse_nregs -= 1;
6402 cum->sse_regno += 1;
6403 if (cum->sse_nregs <= 0)
6417 if (!type || !AGGREGATE_TYPE_P (type))
6419 cum->mmx_words += words;
6420 cum->mmx_nregs -= 1;
6421 cum->mmx_regno += 1;
6422 if (cum->mmx_nregs <= 0)
6433 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6434 const_tree type, HOST_WIDE_INT words, bool named)
6436 int int_nregs, sse_nregs;
6438 /* Unnamed 256bit vector mode parameters are passed on stack. */
6439 if (!named && VALID_AVX256_REG_MODE (mode))
6442 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6443 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6445 cum->nregs -= int_nregs;
6446 cum->sse_nregs -= sse_nregs;
6447 cum->regno += int_nregs;
6448 cum->sse_regno += sse_nregs;
6452 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6453 cum->words = (cum->words + align - 1) & ~(align - 1);
6454 cum->words += words;
6459 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6460 HOST_WIDE_INT words)
6462 /* Otherwise, this should be passed indirect. */
6463 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6465 cum->words += words;
6473 /* Update the data in CUM to advance over an argument of mode MODE and
6474 data type TYPE. (TYPE is null for libcalls where that information
6475 may not be available.) */
6478 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6479 const_tree type, bool named)
6481 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6482 HOST_WIDE_INT bytes, words;
6484 if (mode == BLKmode)
6485 bytes = int_size_in_bytes (type);
6487 bytes = GET_MODE_SIZE (mode);
6488 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6491 mode = type_natural_mode (type, NULL);
6493 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6494 function_arg_advance_ms_64 (cum, bytes, words);
6495 else if (TARGET_64BIT)
6496 function_arg_advance_64 (cum, mode, type, words, named);
6498 function_arg_advance_32 (cum, mode, type, bytes, words);
6501 /* Define where to put the arguments to a function.
6502 Value is zero to push the argument on the stack,
6503 or a hard register in which to store the argument.
6505 MODE is the argument's machine mode.
6506 TYPE is the data type of the argument (as a tree).
6507 This is null for libcalls where that information may
6509 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6510 the preceding args and about the function being called.
6511 NAMED is nonzero if this argument is a named parameter
6512 (otherwise it is an extra parameter matching an ellipsis). */
6515 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6516 enum machine_mode orig_mode, const_tree type,
6517 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6519 static bool warnedsse, warnedmmx;
6521 /* Avoid the AL settings for the Unix64 ABI. */
6522 if (mode == VOIDmode)
6538 if (words <= cum->nregs)
6540 int regno = cum->regno;
6542 /* Fastcall allocates the first two DWORD (SImode) or
6543 smaller arguments to ECX and EDX if it isn't an
6549 || (type && AGGREGATE_TYPE_P (type)))
6552 /* ECX not EAX is the first allocated register. */
6553 if (regno == AX_REG)
6556 return gen_rtx_REG (mode, regno);
6561 if (cum->float_in_sse < 2)
6564 if (cum->float_in_sse < 1)
6568 /* In 32bit, we pass TImode in xmm registers. */
6575 if (!type || !AGGREGATE_TYPE_P (type))
6577 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6580 warning (0, "SSE vector argument without SSE enabled "
6584 return gen_reg_or_parallel (mode, orig_mode,
6585 cum->sse_regno + FIRST_SSE_REG);
6590 /* OImode shouldn't be used directly. */
6599 if (!type || !AGGREGATE_TYPE_P (type))
6602 return gen_reg_or_parallel (mode, orig_mode,
6603 cum->sse_regno + FIRST_SSE_REG);
6613 if (!type || !AGGREGATE_TYPE_P (type))
6615 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6618 warning (0, "MMX vector argument without MMX enabled "
6622 return gen_reg_or_parallel (mode, orig_mode,
6623 cum->mmx_regno + FIRST_MMX_REG);
6632 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6633 enum machine_mode orig_mode, const_tree type, bool named)
6635 /* Handle a hidden AL argument containing number of registers
6636 for varargs x86-64 functions. */
6637 if (mode == VOIDmode)
6638 return GEN_INT (cum->maybe_vaarg
6639 ? (cum->sse_nregs < 0
6640 ? X86_64_SSE_REGPARM_MAX
6655 /* Unnamed 256bit vector mode parameters are passed on stack. */
6661 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6663 &x86_64_int_parameter_registers [cum->regno],
6668 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6669 enum machine_mode orig_mode, bool named,
6670 HOST_WIDE_INT bytes)
6674 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6675 We use value of -2 to specify that current function call is MSABI. */
6676 if (mode == VOIDmode)
6677 return GEN_INT (-2);
6679 /* If we've run out of registers, it goes on the stack. */
6680 if (cum->nregs == 0)
6683 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6685 /* Only floating point modes are passed in anything but integer regs. */
6686 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6689 regno = cum->regno + FIRST_SSE_REG;
6694 /* Unnamed floating parameters are passed in both the
6695 SSE and integer registers. */
6696 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6697 t2 = gen_rtx_REG (mode, regno);
6698 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6699 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6700 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6703 /* Handle aggregated types passed in register. */
6704 if (orig_mode == BLKmode)
6706 if (bytes > 0 && bytes <= 8)
6707 mode = (bytes > 4 ? DImode : SImode);
6708 if (mode == BLKmode)
6712 return gen_reg_or_parallel (mode, orig_mode, regno);
6715 /* Return where to put the arguments to a function.
6716 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6718 MODE is the argument's machine mode. TYPE is the data type of the
6719 argument. It is null for libcalls where that information may not be
6720 available. CUM gives information about the preceding args and about
6721 the function being called. NAMED is nonzero if this argument is a
6722 named parameter (otherwise it is an extra parameter matching an
6726 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6727 const_tree type, bool named)
6729 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6730 enum machine_mode mode = omode;
6731 HOST_WIDE_INT bytes, words;
6734 if (mode == BLKmode)
6735 bytes = int_size_in_bytes (type);
6737 bytes = GET_MODE_SIZE (mode);
6738 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6740 /* To simplify the code below, represent vector types with a vector mode
6741 even if MMX/SSE are not active. */
6742 if (type && TREE_CODE (type) == VECTOR_TYPE)
6743 mode = type_natural_mode (type, cum);
6745 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6746 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6747 else if (TARGET_64BIT)
6748 arg = function_arg_64 (cum, mode, omode, type, named);
6750 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6752 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6754 /* This argument uses 256bit AVX modes. */
6756 cfun->machine->callee_pass_avx256_p = true;
6758 cfun->machine->caller_pass_avx256_p = true;
6764 /* A C expression that indicates when an argument must be passed by
6765 reference. If nonzero for an argument, a copy of that argument is
6766 made in memory and a pointer to the argument is passed instead of
6767 the argument itself. The pointer is passed in whatever way is
6768 appropriate for passing a pointer to that type. */
6771 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6772 enum machine_mode mode ATTRIBUTE_UNUSED,
6773 const_tree type, bool named ATTRIBUTE_UNUSED)
6775 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6777 /* See Windows x64 Software Convention. */
6778 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6780 int msize = (int) GET_MODE_SIZE (mode);
6783 /* Arrays are passed by reference. */
6784 if (TREE_CODE (type) == ARRAY_TYPE)
6787 if (AGGREGATE_TYPE_P (type))
6789 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6790 are passed by reference. */
6791 msize = int_size_in_bytes (type);
6795 /* __m128 is passed by reference. */
6797 case 1: case 2: case 4: case 8:
6803 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6809 /* Return true when TYPE should be 128bit aligned for 32bit argument
6810 passing ABI. XXX: This function is obsolete and is only used for
6811 checking psABI compatibility with previous versions of GCC. */
6814 ix86_compat_aligned_value_p (const_tree type)
6816 enum machine_mode mode = TYPE_MODE (type);
6817 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6821 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6823 if (TYPE_ALIGN (type) < 128)
6826 if (AGGREGATE_TYPE_P (type))
6828 /* Walk the aggregates recursively. */
6829 switch (TREE_CODE (type))
6833 case QUAL_UNION_TYPE:
6837 /* Walk all the structure fields. */
6838 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6840 if (TREE_CODE (field) == FIELD_DECL
6841 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6848 /* Just for use if some languages passes arrays by value. */
6849 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6860 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6861 XXX: This function is obsolete and is only used for checking psABI
6862 compatibility with previous versions of GCC. */
6865 ix86_compat_function_arg_boundary (enum machine_mode mode,
6866 const_tree type, unsigned int align)
6868 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6869 natural boundaries. */
6870 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6872 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6873 make an exception for SSE modes since these require 128bit
6876 The handling here differs from field_alignment. ICC aligns MMX
6877 arguments to 4 byte boundaries, while structure fields are aligned
6878 to 8 byte boundaries. */
6881 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6882 align = PARM_BOUNDARY;
6886 if (!ix86_compat_aligned_value_p (type))
6887 align = PARM_BOUNDARY;
6890 if (align > BIGGEST_ALIGNMENT)
6891 align = BIGGEST_ALIGNMENT;
6895 /* Return true when TYPE should be 128bit aligned for 32bit argument
6899 ix86_contains_aligned_value_p (const_tree type)
6901 enum machine_mode mode = TYPE_MODE (type);
6903 if (mode == XFmode || mode == XCmode)
6906 if (TYPE_ALIGN (type) < 128)
6909 if (AGGREGATE_TYPE_P (type))
6911 /* Walk the aggregates recursively. */
6912 switch (TREE_CODE (type))
6916 case QUAL_UNION_TYPE:
6920 /* Walk all the structure fields. */
6921 for (field = TYPE_FIELDS (type);
6923 field = DECL_CHAIN (field))
6925 if (TREE_CODE (field) == FIELD_DECL
6926 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
6933 /* Just for use if some languages passes arrays by value. */
6934 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
6943 return TYPE_ALIGN (type) >= 128;
6948 /* Gives the alignment boundary, in bits, of an argument with the
6949 specified mode and type. */
6952 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6957 /* Since the main variant type is used for call, we convert it to
6958 the main variant type. */
6959 type = TYPE_MAIN_VARIANT (type);
6960 align = TYPE_ALIGN (type);
6963 align = GET_MODE_ALIGNMENT (mode);
6964 if (align < PARM_BOUNDARY)
6965 align = PARM_BOUNDARY;
6969 unsigned int saved_align = align;
6973 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
6976 if (mode == XFmode || mode == XCmode)
6977 align = PARM_BOUNDARY;
6979 else if (!ix86_contains_aligned_value_p (type))
6980 align = PARM_BOUNDARY;
6983 align = PARM_BOUNDARY;
6988 && align != ix86_compat_function_arg_boundary (mode, type,
6992 inform (input_location,
6993 "The ABI for passing parameters with %d-byte"
6994 " alignment has changed in GCC 4.6",
6995 align / BITS_PER_UNIT);
7002 /* Return true if N is a possible register number of function value. */
7005 ix86_function_value_regno_p (const unsigned int regno)
7012 case FIRST_FLOAT_REG:
7013 /* TODO: The function should depend on current function ABI but
7014 builtins.c would need updating then. Therefore we use the
7016 if (TARGET_64BIT && ix86_abi == MS_ABI)
7018 return TARGET_FLOAT_RETURNS_IN_80387;
7024 if (TARGET_MACHO || TARGET_64BIT)
7032 /* Define how to find the value returned by a function.
7033 VALTYPE is the data type of the value (as a tree).
7034 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7035 otherwise, FUNC is 0. */
7038 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7039 const_tree fntype, const_tree fn)
7043 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7044 we normally prevent this case when mmx is not available. However
7045 some ABIs may require the result to be returned like DImode. */
7046 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7047 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7049 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7050 we prevent this case when sse is not available. However some ABIs
7051 may require the result to be returned like integer TImode. */
7052 else if (mode == TImode
7053 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7054 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7056 /* 32-byte vector modes in %ymm0. */
7057 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7058 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7060 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7061 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7062 regno = FIRST_FLOAT_REG;
7064 /* Most things go in %eax. */
7067 /* Override FP return register with %xmm0 for local functions when
7068 SSE math is enabled or for functions with sseregparm attribute. */
7069 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7071 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7072 if ((sse_level >= 1 && mode == SFmode)
7073 || (sse_level == 2 && mode == DFmode))
7074 regno = FIRST_SSE_REG;
7077 /* OImode shouldn't be used directly. */
7078 gcc_assert (mode != OImode);
7080 return gen_rtx_REG (orig_mode, regno);
7084 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7089 /* Handle libcalls, which don't provide a type node. */
7090 if (valtype == NULL)
7102 return gen_rtx_REG (mode, FIRST_SSE_REG);
7105 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7109 return gen_rtx_REG (mode, AX_REG);
7112 else if (POINTER_TYPE_P (valtype))
7114 /* Pointers are always returned in Pmode. */
7118 ret = construct_container (mode, orig_mode, valtype, 1,
7119 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7120 x86_64_int_return_registers, 0);
7122 /* For zero sized structures, construct_container returns NULL, but we
7123 need to keep rest of compiler happy by returning meaningful value. */
7125 ret = gen_rtx_REG (orig_mode, AX_REG);
7131 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7133 unsigned int regno = AX_REG;
7137 switch (GET_MODE_SIZE (mode))
7140 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7141 && !COMPLEX_MODE_P (mode))
7142 regno = FIRST_SSE_REG;
7146 if (mode == SFmode || mode == DFmode)
7147 regno = FIRST_SSE_REG;
7153 return gen_rtx_REG (orig_mode, regno);
7157 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7158 enum machine_mode orig_mode, enum machine_mode mode)
7160 const_tree fn, fntype;
7163 if (fntype_or_decl && DECL_P (fntype_or_decl))
7164 fn = fntype_or_decl;
7165 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7167 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7168 return function_value_ms_64 (orig_mode, mode);
7169 else if (TARGET_64BIT)
7170 return function_value_64 (orig_mode, mode, valtype);
7172 return function_value_32 (orig_mode, mode, fntype, fn);
7176 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7177 bool outgoing ATTRIBUTE_UNUSED)
7179 enum machine_mode mode, orig_mode;
7181 orig_mode = TYPE_MODE (valtype);
7182 mode = type_natural_mode (valtype, NULL);
7183 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7186 /* Pointer function arguments and return values are promoted to Pmode. */
7188 static enum machine_mode
7189 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7190 int *punsignedp, const_tree fntype,
7193 if (type != NULL_TREE && POINTER_TYPE_P (type))
7195 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7198 return default_promote_function_mode (type, mode, punsignedp, fntype,
7203 ix86_libcall_value (enum machine_mode mode)
7205 return ix86_function_value_1 (NULL, NULL, mode, mode);
7208 /* Return true iff type is returned in memory. */
7210 static bool ATTRIBUTE_UNUSED
7211 return_in_memory_32 (const_tree type, enum machine_mode mode)
7215 if (mode == BLKmode)
7218 size = int_size_in_bytes (type);
7220 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7223 if (VECTOR_MODE_P (mode) || mode == TImode)
7225 /* User-created vectors small enough to fit in EAX. */
7229 /* MMX/3dNow values are returned in MM0,
7230 except when it doesn't exits or the ABI prescribes otherwise. */
7232 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7234 /* SSE values are returned in XMM0, except when it doesn't exist. */
7238 /* AVX values are returned in YMM0, except when it doesn't exist. */
7249 /* OImode shouldn't be used directly. */
7250 gcc_assert (mode != OImode);
7255 static bool ATTRIBUTE_UNUSED
7256 return_in_memory_64 (const_tree type, enum machine_mode mode)
7258 int needed_intregs, needed_sseregs;
7259 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7262 static bool ATTRIBUTE_UNUSED
7263 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7265 HOST_WIDE_INT size = int_size_in_bytes (type);
7267 /* __m128 is returned in xmm0. */
7268 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7269 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7272 /* Otherwise, the size must be exactly in [1248]. */
7273 return size != 1 && size != 2 && size != 4 && size != 8;
7277 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7279 #ifdef SUBTARGET_RETURN_IN_MEMORY
7280 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7282 const enum machine_mode mode = type_natural_mode (type, NULL);
7286 if (ix86_function_type_abi (fntype) == MS_ABI)
7287 return return_in_memory_ms_64 (type, mode);
7289 return return_in_memory_64 (type, mode);
7292 return return_in_memory_32 (type, mode);
7296 /* When returning SSE vector types, we have a choice of either
7297 (1) being abi incompatible with a -march switch, or
7298 (2) generating an error.
7299 Given no good solution, I think the safest thing is one warning.
7300 The user won't be able to use -Werror, but....
7302 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7303 called in response to actually generating a caller or callee that
7304 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7305 via aggregate_value_p for general type probing from tree-ssa. */
7308 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7310 static bool warnedsse, warnedmmx;
7312 if (!TARGET_64BIT && type)
7314 /* Look at the return type of the function, not the function type. */
7315 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7317 if (!TARGET_SSE && !warnedsse)
7320 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7323 warning (0, "SSE vector return without SSE enabled "
7328 if (!TARGET_MMX && !warnedmmx)
7330 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7333 warning (0, "MMX vector return without MMX enabled "
7343 /* Create the va_list data type. */
7345 /* Returns the calling convention specific va_list date type.
7346 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7349 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7351 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7353 /* For i386 we use plain pointer to argument area. */
7354 if (!TARGET_64BIT || abi == MS_ABI)
7355 return build_pointer_type (char_type_node);
7357 record = lang_hooks.types.make_type (RECORD_TYPE);
7358 type_decl = build_decl (BUILTINS_LOCATION,
7359 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7361 f_gpr = build_decl (BUILTINS_LOCATION,
7362 FIELD_DECL, get_identifier ("gp_offset"),
7363 unsigned_type_node);
7364 f_fpr = build_decl (BUILTINS_LOCATION,
7365 FIELD_DECL, get_identifier ("fp_offset"),
7366 unsigned_type_node);
7367 f_ovf = build_decl (BUILTINS_LOCATION,
7368 FIELD_DECL, get_identifier ("overflow_arg_area"),
7370 f_sav = build_decl (BUILTINS_LOCATION,
7371 FIELD_DECL, get_identifier ("reg_save_area"),
7374 va_list_gpr_counter_field = f_gpr;
7375 va_list_fpr_counter_field = f_fpr;
7377 DECL_FIELD_CONTEXT (f_gpr) = record;
7378 DECL_FIELD_CONTEXT (f_fpr) = record;
7379 DECL_FIELD_CONTEXT (f_ovf) = record;
7380 DECL_FIELD_CONTEXT (f_sav) = record;
7382 TYPE_STUB_DECL (record) = type_decl;
7383 TYPE_NAME (record) = type_decl;
7384 TYPE_FIELDS (record) = f_gpr;
7385 DECL_CHAIN (f_gpr) = f_fpr;
7386 DECL_CHAIN (f_fpr) = f_ovf;
7387 DECL_CHAIN (f_ovf) = f_sav;
7389 layout_type (record);
7391 /* The correct type is an array type of one element. */
7392 return build_array_type (record, build_index_type (size_zero_node));
7395 /* Setup the builtin va_list data type and for 64-bit the additional
7396 calling convention specific va_list data types. */
7399 ix86_build_builtin_va_list (void)
7401 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7403 /* Initialize abi specific va_list builtin types. */
7407 if (ix86_abi == MS_ABI)
7409 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7410 if (TREE_CODE (t) != RECORD_TYPE)
7411 t = build_variant_type_copy (t);
7412 sysv_va_list_type_node = t;
7417 if (TREE_CODE (t) != RECORD_TYPE)
7418 t = build_variant_type_copy (t);
7419 sysv_va_list_type_node = t;
7421 if (ix86_abi != MS_ABI)
7423 t = ix86_build_builtin_va_list_abi (MS_ABI);
7424 if (TREE_CODE (t) != RECORD_TYPE)
7425 t = build_variant_type_copy (t);
7426 ms_va_list_type_node = t;
7431 if (TREE_CODE (t) != RECORD_TYPE)
7432 t = build_variant_type_copy (t);
7433 ms_va_list_type_node = t;
7440 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7443 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7449 /* GPR size of varargs save area. */
7450 if (cfun->va_list_gpr_size)
7451 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7453 ix86_varargs_gpr_size = 0;
7455 /* FPR size of varargs save area. We don't need it if we don't pass
7456 anything in SSE registers. */
7457 if (TARGET_SSE && cfun->va_list_fpr_size)
7458 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7460 ix86_varargs_fpr_size = 0;
7462 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7465 save_area = frame_pointer_rtx;
7466 set = get_varargs_alias_set ();
7468 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7469 if (max > X86_64_REGPARM_MAX)
7470 max = X86_64_REGPARM_MAX;
7472 for (i = cum->regno; i < max; i++)
7474 mem = gen_rtx_MEM (Pmode,
7475 plus_constant (save_area, i * UNITS_PER_WORD));
7476 MEM_NOTRAP_P (mem) = 1;
7477 set_mem_alias_set (mem, set);
7478 emit_move_insn (mem, gen_rtx_REG (Pmode,
7479 x86_64_int_parameter_registers[i]));
7482 if (ix86_varargs_fpr_size)
7484 enum machine_mode smode;
7487 /* Now emit code to save SSE registers. The AX parameter contains number
7488 of SSE parameter registers used to call this function, though all we
7489 actually check here is the zero/non-zero status. */
7491 label = gen_label_rtx ();
7492 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7493 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7496 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7497 we used movdqa (i.e. TImode) instead? Perhaps even better would
7498 be if we could determine the real mode of the data, via a hook
7499 into pass_stdarg. Ignore all that for now. */
7501 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7502 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7504 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7505 if (max > X86_64_SSE_REGPARM_MAX)
7506 max = X86_64_SSE_REGPARM_MAX;
7508 for (i = cum->sse_regno; i < max; ++i)
7510 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7511 mem = gen_rtx_MEM (smode, mem);
7512 MEM_NOTRAP_P (mem) = 1;
7513 set_mem_alias_set (mem, set);
7514 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7516 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7524 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7526 alias_set_type set = get_varargs_alias_set ();
7529 /* Reset to zero, as there might be a sysv vaarg used
7531 ix86_varargs_gpr_size = 0;
7532 ix86_varargs_fpr_size = 0;
7534 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7538 mem = gen_rtx_MEM (Pmode,
7539 plus_constant (virtual_incoming_args_rtx,
7540 i * UNITS_PER_WORD));
7541 MEM_NOTRAP_P (mem) = 1;
7542 set_mem_alias_set (mem, set);
7544 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7545 emit_move_insn (mem, reg);
7550 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7551 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7554 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7555 CUMULATIVE_ARGS next_cum;
7558 /* This argument doesn't appear to be used anymore. Which is good,
7559 because the old code here didn't suppress rtl generation. */
7560 gcc_assert (!no_rtl);
7565 fntype = TREE_TYPE (current_function_decl);
7567 /* For varargs, we do not want to skip the dummy va_dcl argument.
7568 For stdargs, we do want to skip the last named argument. */
7570 if (stdarg_p (fntype))
7571 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7574 if (cum->call_abi == MS_ABI)
7575 setup_incoming_varargs_ms_64 (&next_cum);
7577 setup_incoming_varargs_64 (&next_cum);
7580 /* Checks if TYPE is of kind va_list char *. */
7583 is_va_list_char_pointer (tree type)
7587 /* For 32-bit it is always true. */
7590 canonic = ix86_canonical_va_list_type (type);
7591 return (canonic == ms_va_list_type_node
7592 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7595 /* Implement va_start. */
7598 ix86_va_start (tree valist, rtx nextarg)
7600 HOST_WIDE_INT words, n_gpr, n_fpr;
7601 tree f_gpr, f_fpr, f_ovf, f_sav;
7602 tree gpr, fpr, ovf, sav, t;
7606 if (flag_split_stack
7607 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7609 unsigned int scratch_regno;
7611 /* When we are splitting the stack, we can't refer to the stack
7612 arguments using internal_arg_pointer, because they may be on
7613 the old stack. The split stack prologue will arrange to
7614 leave a pointer to the old stack arguments in a scratch
7615 register, which we here copy to a pseudo-register. The split
7616 stack prologue can't set the pseudo-register directly because
7617 it (the prologue) runs before any registers have been saved. */
7619 scratch_regno = split_stack_prologue_scratch_regno ();
7620 if (scratch_regno != INVALID_REGNUM)
7624 reg = gen_reg_rtx (Pmode);
7625 cfun->machine->split_stack_varargs_pointer = reg;
7628 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7632 push_topmost_sequence ();
7633 emit_insn_after (seq, entry_of_function ());
7634 pop_topmost_sequence ();
7638 /* Only 64bit target needs something special. */
7639 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7641 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7642 std_expand_builtin_va_start (valist, nextarg);
7647 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7648 next = expand_binop (ptr_mode, add_optab,
7649 cfun->machine->split_stack_varargs_pointer,
7650 crtl->args.arg_offset_rtx,
7651 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7652 convert_move (va_r, next, 0);
7657 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7658 f_fpr = DECL_CHAIN (f_gpr);
7659 f_ovf = DECL_CHAIN (f_fpr);
7660 f_sav = DECL_CHAIN (f_ovf);
7662 valist = build_simple_mem_ref (valist);
7663 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7664 /* The following should be folded into the MEM_REF offset. */
7665 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7667 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7669 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7671 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7674 /* Count number of gp and fp argument registers used. */
7675 words = crtl->args.info.words;
7676 n_gpr = crtl->args.info.regno;
7677 n_fpr = crtl->args.info.sse_regno;
7679 if (cfun->va_list_gpr_size)
7681 type = TREE_TYPE (gpr);
7682 t = build2 (MODIFY_EXPR, type,
7683 gpr, build_int_cst (type, n_gpr * 8));
7684 TREE_SIDE_EFFECTS (t) = 1;
7685 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7688 if (TARGET_SSE && cfun->va_list_fpr_size)
7690 type = TREE_TYPE (fpr);
7691 t = build2 (MODIFY_EXPR, type, fpr,
7692 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7693 TREE_SIDE_EFFECTS (t) = 1;
7694 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7697 /* Find the overflow area. */
7698 type = TREE_TYPE (ovf);
7699 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7700 ovf_rtx = crtl->args.internal_arg_pointer;
7702 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7703 t = make_tree (type, ovf_rtx);
7705 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7706 t = build2 (MODIFY_EXPR, type, ovf, t);
7707 TREE_SIDE_EFFECTS (t) = 1;
7708 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7710 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7712 /* Find the register save area.
7713 Prologue of the function save it right above stack frame. */
7714 type = TREE_TYPE (sav);
7715 t = make_tree (type, frame_pointer_rtx);
7716 if (!ix86_varargs_gpr_size)
7717 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7718 t = build2 (MODIFY_EXPR, type, sav, t);
7719 TREE_SIDE_EFFECTS (t) = 1;
7720 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7724 /* Implement va_arg. */
7727 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7730 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7731 tree f_gpr, f_fpr, f_ovf, f_sav;
7732 tree gpr, fpr, ovf, sav, t;
7734 tree lab_false, lab_over = NULL_TREE;
7739 enum machine_mode nat_mode;
7740 unsigned int arg_boundary;
7742 /* Only 64bit target needs something special. */
7743 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7744 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7746 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7747 f_fpr = DECL_CHAIN (f_gpr);
7748 f_ovf = DECL_CHAIN (f_fpr);
7749 f_sav = DECL_CHAIN (f_ovf);
7751 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7752 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7753 valist = build_va_arg_indirect_ref (valist);
7754 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7755 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7756 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7758 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7760 type = build_pointer_type (type);
7761 size = int_size_in_bytes (type);
7762 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7764 nat_mode = type_natural_mode (type, NULL);
7773 /* Unnamed 256bit vector mode parameters are passed on stack. */
7774 if (!TARGET_64BIT_MS_ABI)
7781 container = construct_container (nat_mode, TYPE_MODE (type),
7782 type, 0, X86_64_REGPARM_MAX,
7783 X86_64_SSE_REGPARM_MAX, intreg,
7788 /* Pull the value out of the saved registers. */
7790 addr = create_tmp_var (ptr_type_node, "addr");
7794 int needed_intregs, needed_sseregs;
7796 tree int_addr, sse_addr;
7798 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7799 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7801 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7803 need_temp = (!REG_P (container)
7804 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7805 || TYPE_ALIGN (type) > 128));
7807 /* In case we are passing structure, verify that it is consecutive block
7808 on the register save area. If not we need to do moves. */
7809 if (!need_temp && !REG_P (container))
7811 /* Verify that all registers are strictly consecutive */
7812 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7816 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7818 rtx slot = XVECEXP (container, 0, i);
7819 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7820 || INTVAL (XEXP (slot, 1)) != i * 16)
7828 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7830 rtx slot = XVECEXP (container, 0, i);
7831 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7832 || INTVAL (XEXP (slot, 1)) != i * 8)
7844 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7845 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7848 /* First ensure that we fit completely in registers. */
7851 t = build_int_cst (TREE_TYPE (gpr),
7852 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7853 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7854 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7855 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7856 gimplify_and_add (t, pre_p);
7860 t = build_int_cst (TREE_TYPE (fpr),
7861 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7862 + X86_64_REGPARM_MAX * 8);
7863 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7864 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7865 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7866 gimplify_and_add (t, pre_p);
7869 /* Compute index to start of area used for integer regs. */
7872 /* int_addr = gpr + sav; */
7873 t = fold_build_pointer_plus (sav, gpr);
7874 gimplify_assign (int_addr, t, pre_p);
7878 /* sse_addr = fpr + sav; */
7879 t = fold_build_pointer_plus (sav, fpr);
7880 gimplify_assign (sse_addr, t, pre_p);
7884 int i, prev_size = 0;
7885 tree temp = create_tmp_var (type, "va_arg_tmp");
7888 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7889 gimplify_assign (addr, t, pre_p);
7891 for (i = 0; i < XVECLEN (container, 0); i++)
7893 rtx slot = XVECEXP (container, 0, i);
7894 rtx reg = XEXP (slot, 0);
7895 enum machine_mode mode = GET_MODE (reg);
7901 tree dest_addr, dest;
7902 int cur_size = GET_MODE_SIZE (mode);
7904 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7905 prev_size = INTVAL (XEXP (slot, 1));
7906 if (prev_size + cur_size > size)
7908 cur_size = size - prev_size;
7909 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7910 if (mode == BLKmode)
7913 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7914 if (mode == GET_MODE (reg))
7915 addr_type = build_pointer_type (piece_type);
7917 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7919 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7922 if (SSE_REGNO_P (REGNO (reg)))
7924 src_addr = sse_addr;
7925 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7929 src_addr = int_addr;
7930 src_offset = REGNO (reg) * 8;
7932 src_addr = fold_convert (addr_type, src_addr);
7933 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
7935 dest_addr = fold_convert (daddr_type, addr);
7936 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
7937 if (cur_size == GET_MODE_SIZE (mode))
7939 src = build_va_arg_indirect_ref (src_addr);
7940 dest = build_va_arg_indirect_ref (dest_addr);
7942 gimplify_assign (dest, src, pre_p);
7947 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7948 3, dest_addr, src_addr,
7949 size_int (cur_size));
7950 gimplify_and_add (copy, pre_p);
7952 prev_size += cur_size;
7958 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7959 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7960 gimplify_assign (gpr, t, pre_p);
7965 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7966 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7967 gimplify_assign (fpr, t, pre_p);
7970 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7972 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7975 /* ... otherwise out of the overflow area. */
7977 /* When we align parameter on stack for caller, if the parameter
7978 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7979 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7980 here with caller. */
7981 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
7982 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7983 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7985 /* Care for on-stack alignment if needed. */
7986 if (arg_boundary <= 64 || size == 0)
7990 HOST_WIDE_INT align = arg_boundary / 8;
7991 t = fold_build_pointer_plus_hwi (ovf, align - 1);
7992 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7993 build_int_cst (TREE_TYPE (t), -align));
7996 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7997 gimplify_assign (addr, t, pre_p);
7999 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8000 gimplify_assign (unshare_expr (ovf), t, pre_p);
8003 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8005 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8006 addr = fold_convert (ptrtype, addr);
8009 addr = build_va_arg_indirect_ref (addr);
8010 return build_va_arg_indirect_ref (addr);
8013 /* Return true if OPNUM's MEM should be matched
8014 in movabs* patterns. */
8017 ix86_check_movabs (rtx insn, int opnum)
8021 set = PATTERN (insn);
8022 if (GET_CODE (set) == PARALLEL)
8023 set = XVECEXP (set, 0, 0);
8024 gcc_assert (GET_CODE (set) == SET);
8025 mem = XEXP (set, opnum);
8026 while (GET_CODE (mem) == SUBREG)
8027 mem = SUBREG_REG (mem);
8028 gcc_assert (MEM_P (mem));
8029 return volatile_ok || !MEM_VOLATILE_P (mem);
8032 /* Initialize the table of extra 80387 mathematical constants. */
8035 init_ext_80387_constants (void)
8037 static const char * cst[5] =
8039 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8040 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8041 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8042 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8043 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8047 for (i = 0; i < 5; i++)
8049 real_from_string (&ext_80387_constants_table[i], cst[i]);
8050 /* Ensure each constant is rounded to XFmode precision. */
8051 real_convert (&ext_80387_constants_table[i],
8052 XFmode, &ext_80387_constants_table[i]);
8055 ext_80387_constants_init = 1;
8058 /* Return non-zero if the constant is something that
8059 can be loaded with a special instruction. */
8062 standard_80387_constant_p (rtx x)
8064 enum machine_mode mode = GET_MODE (x);
8068 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8071 if (x == CONST0_RTX (mode))
8073 if (x == CONST1_RTX (mode))
8076 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8078 /* For XFmode constants, try to find a special 80387 instruction when
8079 optimizing for size or on those CPUs that benefit from them. */
8081 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8085 if (! ext_80387_constants_init)
8086 init_ext_80387_constants ();
8088 for (i = 0; i < 5; i++)
8089 if (real_identical (&r, &ext_80387_constants_table[i]))
8093 /* Load of the constant -0.0 or -1.0 will be split as
8094 fldz;fchs or fld1;fchs sequence. */
8095 if (real_isnegzero (&r))
8097 if (real_identical (&r, &dconstm1))
8103 /* Return the opcode of the special instruction to be used to load
8107 standard_80387_constant_opcode (rtx x)
8109 switch (standard_80387_constant_p (x))
8133 /* Return the CONST_DOUBLE representing the 80387 constant that is
8134 loaded by the specified special instruction. The argument IDX
8135 matches the return value from standard_80387_constant_p. */
8138 standard_80387_constant_rtx (int idx)
8142 if (! ext_80387_constants_init)
8143 init_ext_80387_constants ();
8159 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8163 /* Return 1 if X is all 0s and 2 if x is all 1s
8164 in supported SSE vector mode. */
8167 standard_sse_constant_p (rtx x)
8169 enum machine_mode mode = GET_MODE (x);
8171 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8173 if (vector_all_ones_operand (x, mode))
8189 /* Return the opcode of the special instruction to be used to load
8193 standard_sse_constant_opcode (rtx insn, rtx x)
8195 switch (standard_sse_constant_p (x))
8198 switch (get_attr_mode (insn))
8201 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8202 return "%vpxor\t%0, %d0";
8204 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8205 return "%vxorpd\t%0, %d0";
8207 return "%vxorps\t%0, %d0";
8210 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8211 return "vpxor\t%x0, %x0, %x0";
8213 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8214 return "vxorpd\t%x0, %x0, %x0";
8216 return "vxorps\t%x0, %x0, %x0";
8223 return "%vpcmpeqd\t%0, %d0";
8230 /* Returns true if OP contains a symbol reference */
8233 symbolic_reference_mentioned_p (rtx op)
8238 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8241 fmt = GET_RTX_FORMAT (GET_CODE (op));
8242 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8248 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8249 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8253 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8260 /* Return true if it is appropriate to emit `ret' instructions in the
8261 body of a function. Do this only if the epilogue is simple, needing a
8262 couple of insns. Prior to reloading, we can't tell how many registers
8263 must be saved, so return false then. Return false if there is no frame
8264 marker to de-allocate. */
8267 ix86_can_use_return_insn_p (void)
8269 struct ix86_frame frame;
8271 if (! reload_completed || frame_pointer_needed)
8274 /* Don't allow more than 32k pop, since that's all we can do
8275 with one instruction. */
8276 if (crtl->args.pops_args && crtl->args.size >= 32768)
8279 ix86_compute_frame_layout (&frame);
8280 return (frame.stack_pointer_offset == UNITS_PER_WORD
8281 && (frame.nregs + frame.nsseregs) == 0);
8284 /* Value should be nonzero if functions must have frame pointers.
8285 Zero means the frame pointer need not be set up (and parms may
8286 be accessed via the stack pointer) in functions that seem suitable. */
8289 ix86_frame_pointer_required (void)
8291 /* If we accessed previous frames, then the generated code expects
8292 to be able to access the saved ebp value in our frame. */
8293 if (cfun->machine->accesses_prev_frame)
8296 /* Several x86 os'es need a frame pointer for other reasons,
8297 usually pertaining to setjmp. */
8298 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8301 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8302 turns off the frame pointer by default. Turn it back on now if
8303 we've not got a leaf function. */
8304 if (TARGET_OMIT_LEAF_FRAME_POINTER
8305 && (!current_function_is_leaf
8306 || ix86_current_function_calls_tls_descriptor))
8309 if (crtl->profile && !flag_fentry)
8315 /* Record that the current function accesses previous call frames. */
8318 ix86_setup_frame_addresses (void)
8320 cfun->machine->accesses_prev_frame = 1;
8323 #ifndef USE_HIDDEN_LINKONCE
8324 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8325 # define USE_HIDDEN_LINKONCE 1
8327 # define USE_HIDDEN_LINKONCE 0
8331 static int pic_labels_used;
8333 /* Fills in the label name that should be used for a pc thunk for
8334 the given register. */
8337 get_pc_thunk_name (char name[32], unsigned int regno)
8339 gcc_assert (!TARGET_64BIT);
8341 if (USE_HIDDEN_LINKONCE)
8342 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8344 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8348 /* This function generates code for -fpic that loads %ebx with
8349 the return address of the caller and then returns. */
8352 ix86_code_end (void)
8357 for (regno = AX_REG; regno <= SP_REG; regno++)
8362 if (!(pic_labels_used & (1 << regno)))
8365 get_pc_thunk_name (name, regno);
8367 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8368 get_identifier (name),
8369 build_function_type_list (void_type_node, NULL_TREE));
8370 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8371 NULL_TREE, void_type_node);
8372 TREE_PUBLIC (decl) = 1;
8373 TREE_STATIC (decl) = 1;
8378 switch_to_section (darwin_sections[text_coal_section]);
8379 fputs ("\t.weak_definition\t", asm_out_file);
8380 assemble_name (asm_out_file, name);
8381 fputs ("\n\t.private_extern\t", asm_out_file);
8382 assemble_name (asm_out_file, name);
8383 putc ('\n', asm_out_file);
8384 ASM_OUTPUT_LABEL (asm_out_file, name);
8385 DECL_WEAK (decl) = 1;
8389 if (USE_HIDDEN_LINKONCE)
8391 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8393 targetm.asm_out.unique_section (decl, 0);
8394 switch_to_section (get_named_section (decl, NULL, 0));
8396 targetm.asm_out.globalize_label (asm_out_file, name);
8397 fputs ("\t.hidden\t", asm_out_file);
8398 assemble_name (asm_out_file, name);
8399 putc ('\n', asm_out_file);
8400 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8404 switch_to_section (text_section);
8405 ASM_OUTPUT_LABEL (asm_out_file, name);
8408 DECL_INITIAL (decl) = make_node (BLOCK);
8409 current_function_decl = decl;
8410 init_function_start (decl);
8411 first_function_block_is_cold = false;
8412 /* Make sure unwind info is emitted for the thunk if needed. */
8413 final_start_function (emit_barrier (), asm_out_file, 1);
8415 /* Pad stack IP move with 4 instructions (two NOPs count
8416 as one instruction). */
8417 if (TARGET_PAD_SHORT_FUNCTION)
8422 fputs ("\tnop\n", asm_out_file);
8425 xops[0] = gen_rtx_REG (Pmode, regno);
8426 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8427 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8428 fputs ("\tret\n", asm_out_file);
8429 final_end_function ();
8430 init_insn_lengths ();
8431 free_after_compilation (cfun);
8433 current_function_decl = NULL;
8436 if (flag_split_stack)
8437 file_end_indicate_split_stack ();
8440 /* Emit code for the SET_GOT patterns. */
8443 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8449 if (TARGET_VXWORKS_RTP && flag_pic)
8451 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8452 xops[2] = gen_rtx_MEM (Pmode,
8453 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8454 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8456 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8457 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8458 an unadorned address. */
8459 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8460 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8461 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8465 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8469 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8471 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8474 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8475 is what will be referenced by the Mach-O PIC subsystem. */
8477 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8480 targetm.asm_out.internal_label (asm_out_file, "L",
8481 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8486 get_pc_thunk_name (name, REGNO (dest));
8487 pic_labels_used |= 1 << REGNO (dest);
8489 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8490 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8491 output_asm_insn ("call\t%X2", xops);
8492 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8493 is what will be referenced by the Mach-O PIC subsystem. */
8496 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8498 targetm.asm_out.internal_label (asm_out_file, "L",
8499 CODE_LABEL_NUMBER (label));
8504 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8509 /* Generate an "push" pattern for input ARG. */
8514 struct machine_function *m = cfun->machine;
8516 if (m->fs.cfa_reg == stack_pointer_rtx)
8517 m->fs.cfa_offset += UNITS_PER_WORD;
8518 m->fs.sp_offset += UNITS_PER_WORD;
8520 return gen_rtx_SET (VOIDmode,
8522 gen_rtx_PRE_DEC (Pmode,
8523 stack_pointer_rtx)),
8527 /* Generate an "pop" pattern for input ARG. */
8532 return gen_rtx_SET (VOIDmode,
8535 gen_rtx_POST_INC (Pmode,
8536 stack_pointer_rtx)));
8539 /* Return >= 0 if there is an unused call-clobbered register available
8540 for the entire function. */
8543 ix86_select_alt_pic_regnum (void)
8545 if (current_function_is_leaf
8547 && !ix86_current_function_calls_tls_descriptor)
8550 /* Can't use the same register for both PIC and DRAP. */
8552 drap = REGNO (crtl->drap_reg);
8555 for (i = 2; i >= 0; --i)
8556 if (i != drap && !df_regs_ever_live_p (i))
8560 return INVALID_REGNUM;
8563 /* Return TRUE if we need to save REGNO. */
8566 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8568 if (pic_offset_table_rtx
8569 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8570 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8572 || crtl->calls_eh_return
8573 || crtl->uses_const_pool))
8574 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8576 if (crtl->calls_eh_return && maybe_eh_return)
8581 unsigned test = EH_RETURN_DATA_REGNO (i);
8582 if (test == INVALID_REGNUM)
8589 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8592 return (df_regs_ever_live_p (regno)
8593 && !call_used_regs[regno]
8594 && !fixed_regs[regno]
8595 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8598 /* Return number of saved general prupose registers. */
8601 ix86_nsaved_regs (void)
8606 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8607 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8612 /* Return number of saved SSE registrers. */
8615 ix86_nsaved_sseregs (void)
8620 if (!TARGET_64BIT_MS_ABI)
8622 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8623 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8628 /* Given FROM and TO register numbers, say whether this elimination is
8629 allowed. If stack alignment is needed, we can only replace argument
8630 pointer with hard frame pointer, or replace frame pointer with stack
8631 pointer. Otherwise, frame pointer elimination is automatically
8632 handled and all other eliminations are valid. */
8635 ix86_can_eliminate (const int from, const int to)
8637 if (stack_realign_fp)
8638 return ((from == ARG_POINTER_REGNUM
8639 && to == HARD_FRAME_POINTER_REGNUM)
8640 || (from == FRAME_POINTER_REGNUM
8641 && to == STACK_POINTER_REGNUM));
8643 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8646 /* Return the offset between two registers, one to be eliminated, and the other
8647 its replacement, at the start of a routine. */
8650 ix86_initial_elimination_offset (int from, int to)
8652 struct ix86_frame frame;
8653 ix86_compute_frame_layout (&frame);
8655 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8656 return frame.hard_frame_pointer_offset;
8657 else if (from == FRAME_POINTER_REGNUM
8658 && to == HARD_FRAME_POINTER_REGNUM)
8659 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8662 gcc_assert (to == STACK_POINTER_REGNUM);
8664 if (from == ARG_POINTER_REGNUM)
8665 return frame.stack_pointer_offset;
8667 gcc_assert (from == FRAME_POINTER_REGNUM);
8668 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8672 /* In a dynamically-aligned function, we can't know the offset from
8673 stack pointer to frame pointer, so we must ensure that setjmp
8674 eliminates fp against the hard fp (%ebp) rather than trying to
8675 index from %esp up to the top of the frame across a gap that is
8676 of unknown (at compile-time) size. */
8678 ix86_builtin_setjmp_frame_value (void)
8680 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8683 /* When using -fsplit-stack, the allocation routines set a field in
8684 the TCB to the bottom of the stack plus this much space, measured
8687 #define SPLIT_STACK_AVAILABLE 256
8689 /* Fill structure ix86_frame about frame of currently computed function. */
8692 ix86_compute_frame_layout (struct ix86_frame *frame)
8694 unsigned int stack_alignment_needed;
8695 HOST_WIDE_INT offset;
8696 unsigned int preferred_alignment;
8697 HOST_WIDE_INT size = get_frame_size ();
8698 HOST_WIDE_INT to_allocate;
8700 frame->nregs = ix86_nsaved_regs ();
8701 frame->nsseregs = ix86_nsaved_sseregs ();
8703 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8704 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8706 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8707 function prologues and leaf. */
8708 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8709 && (!current_function_is_leaf || cfun->calls_alloca != 0
8710 || ix86_current_function_calls_tls_descriptor))
8712 preferred_alignment = 16;
8713 stack_alignment_needed = 16;
8714 crtl->preferred_stack_boundary = 128;
8715 crtl->stack_alignment_needed = 128;
8718 gcc_assert (!size || stack_alignment_needed);
8719 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8720 gcc_assert (preferred_alignment <= stack_alignment_needed);
8722 /* For SEH we have to limit the amount of code movement into the prologue.
8723 At present we do this via a BLOCKAGE, at which point there's very little
8724 scheduling that can be done, which means that there's very little point
8725 in doing anything except PUSHs. */
8727 cfun->machine->use_fast_prologue_epilogue = false;
8729 /* During reload iteration the amount of registers saved can change.
8730 Recompute the value as needed. Do not recompute when amount of registers
8731 didn't change as reload does multiple calls to the function and does not
8732 expect the decision to change within single iteration. */
8733 else if (!optimize_function_for_size_p (cfun)
8734 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8736 int count = frame->nregs;
8737 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8739 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8741 /* The fast prologue uses move instead of push to save registers. This
8742 is significantly longer, but also executes faster as modern hardware
8743 can execute the moves in parallel, but can't do that for push/pop.
8745 Be careful about choosing what prologue to emit: When function takes
8746 many instructions to execute we may use slow version as well as in
8747 case function is known to be outside hot spot (this is known with
8748 feedback only). Weight the size of function by number of registers
8749 to save as it is cheap to use one or two push instructions but very
8750 slow to use many of them. */
8752 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8753 if (node->frequency < NODE_FREQUENCY_NORMAL
8754 || (flag_branch_probabilities
8755 && node->frequency < NODE_FREQUENCY_HOT))
8756 cfun->machine->use_fast_prologue_epilogue = false;
8758 cfun->machine->use_fast_prologue_epilogue
8759 = !expensive_function_p (count);
8762 frame->save_regs_using_mov
8763 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
8764 /* If static stack checking is enabled and done with probes,
8765 the registers need to be saved before allocating the frame. */
8766 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
8768 /* Skip return address. */
8769 offset = UNITS_PER_WORD;
8771 /* Skip pushed static chain. */
8772 if (ix86_static_chain_on_stack)
8773 offset += UNITS_PER_WORD;
8775 /* Skip saved base pointer. */
8776 if (frame_pointer_needed)
8777 offset += UNITS_PER_WORD;
8778 frame->hfp_save_offset = offset;
8780 /* The traditional frame pointer location is at the top of the frame. */
8781 frame->hard_frame_pointer_offset = offset;
8783 /* Register save area */
8784 offset += frame->nregs * UNITS_PER_WORD;
8785 frame->reg_save_offset = offset;
8787 /* Align and set SSE register save area. */
8788 if (frame->nsseregs)
8790 /* The only ABI that has saved SSE registers (Win64) also has a
8791 16-byte aligned default stack, and thus we don't need to be
8792 within the re-aligned local stack frame to save them. */
8793 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8794 offset = (offset + 16 - 1) & -16;
8795 offset += frame->nsseregs * 16;
8797 frame->sse_reg_save_offset = offset;
8799 /* The re-aligned stack starts here. Values before this point are not
8800 directly comparable with values below this point. In order to make
8801 sure that no value happens to be the same before and after, force
8802 the alignment computation below to add a non-zero value. */
8803 if (stack_realign_fp)
8804 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8807 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8808 offset += frame->va_arg_size;
8810 /* Align start of frame for local function. */
8811 if (stack_realign_fp
8812 || offset != frame->sse_reg_save_offset
8814 || !current_function_is_leaf
8815 || cfun->calls_alloca
8816 || ix86_current_function_calls_tls_descriptor)
8817 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8819 /* Frame pointer points here. */
8820 frame->frame_pointer_offset = offset;
8824 /* Add outgoing arguments area. Can be skipped if we eliminated
8825 all the function calls as dead code.
8826 Skipping is however impossible when function calls alloca. Alloca
8827 expander assumes that last crtl->outgoing_args_size
8828 of stack frame are unused. */
8829 if (ACCUMULATE_OUTGOING_ARGS
8830 && (!current_function_is_leaf || cfun->calls_alloca
8831 || ix86_current_function_calls_tls_descriptor))
8833 offset += crtl->outgoing_args_size;
8834 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8837 frame->outgoing_arguments_size = 0;
8839 /* Align stack boundary. Only needed if we're calling another function
8841 if (!current_function_is_leaf || cfun->calls_alloca
8842 || ix86_current_function_calls_tls_descriptor)
8843 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8845 /* We've reached end of stack frame. */
8846 frame->stack_pointer_offset = offset;
8848 /* Size prologue needs to allocate. */
8849 to_allocate = offset - frame->sse_reg_save_offset;
8851 if ((!to_allocate && frame->nregs <= 1)
8852 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8853 frame->save_regs_using_mov = false;
8855 if (ix86_using_red_zone ()
8856 && current_function_sp_is_unchanging
8857 && current_function_is_leaf
8858 && !ix86_current_function_calls_tls_descriptor)
8860 frame->red_zone_size = to_allocate;
8861 if (frame->save_regs_using_mov)
8862 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8863 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8864 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8867 frame->red_zone_size = 0;
8868 frame->stack_pointer_offset -= frame->red_zone_size;
8870 /* The SEH frame pointer location is near the bottom of the frame.
8871 This is enforced by the fact that the difference between the
8872 stack pointer and the frame pointer is limited to 240 bytes in
8873 the unwind data structure. */
8878 /* If we can leave the frame pointer where it is, do so. */
8879 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
8880 if (diff > 240 || (diff & 15) != 0)
8882 /* Ideally we'd determine what portion of the local stack frame
8883 (within the constraint of the lowest 240) is most heavily used.
8884 But without that complication, simply bias the frame pointer
8885 by 128 bytes so as to maximize the amount of the local stack
8886 frame that is addressable with 8-bit offsets. */
8887 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
8892 /* This is semi-inlined memory_address_length, but simplified
8893 since we know that we're always dealing with reg+offset, and
8894 to avoid having to create and discard all that rtl. */
8897 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8903 /* EBP and R13 cannot be encoded without an offset. */
8904 len = (regno == BP_REG || regno == R13_REG);
8906 else if (IN_RANGE (offset, -128, 127))
8909 /* ESP and R12 must be encoded with a SIB byte. */
8910 if (regno == SP_REG || regno == R12_REG)
8916 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8917 The valid base registers are taken from CFUN->MACHINE->FS. */
8920 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8922 const struct machine_function *m = cfun->machine;
8923 rtx base_reg = NULL;
8924 HOST_WIDE_INT base_offset = 0;
8926 if (m->use_fast_prologue_epilogue)
8928 /* Choose the base register most likely to allow the most scheduling
8929 opportunities. Generally FP is valid througout the function,
8930 while DRAP must be reloaded within the epilogue. But choose either
8931 over the SP due to increased encoding size. */
8935 base_reg = hard_frame_pointer_rtx;
8936 base_offset = m->fs.fp_offset - cfa_offset;
8938 else if (m->fs.drap_valid)
8940 base_reg = crtl->drap_reg;
8941 base_offset = 0 - cfa_offset;
8943 else if (m->fs.sp_valid)
8945 base_reg = stack_pointer_rtx;
8946 base_offset = m->fs.sp_offset - cfa_offset;
8951 HOST_WIDE_INT toffset;
8954 /* Choose the base register with the smallest address encoding.
8955 With a tie, choose FP > DRAP > SP. */
8958 base_reg = stack_pointer_rtx;
8959 base_offset = m->fs.sp_offset - cfa_offset;
8960 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8962 if (m->fs.drap_valid)
8964 toffset = 0 - cfa_offset;
8965 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8968 base_reg = crtl->drap_reg;
8969 base_offset = toffset;
8975 toffset = m->fs.fp_offset - cfa_offset;
8976 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8979 base_reg = hard_frame_pointer_rtx;
8980 base_offset = toffset;
8985 gcc_assert (base_reg != NULL);
8987 return plus_constant (base_reg, base_offset);
8990 /* Emit code to save registers in the prologue. */
8993 ix86_emit_save_regs (void)
8998 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8999 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9001 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9002 RTX_FRAME_RELATED_P (insn) = 1;
9006 /* Emit a single register save at CFA - CFA_OFFSET. */
9009 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9010 HOST_WIDE_INT cfa_offset)
9012 struct machine_function *m = cfun->machine;
9013 rtx reg = gen_rtx_REG (mode, regno);
9014 rtx mem, addr, base, insn;
9016 addr = choose_baseaddr (cfa_offset);
9017 mem = gen_frame_mem (mode, addr);
9019 /* For SSE saves, we need to indicate the 128-bit alignment. */
9020 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9022 insn = emit_move_insn (mem, reg);
9023 RTX_FRAME_RELATED_P (insn) = 1;
9026 if (GET_CODE (base) == PLUS)
9027 base = XEXP (base, 0);
9028 gcc_checking_assert (REG_P (base));
9030 /* When saving registers into a re-aligned local stack frame, avoid
9031 any tricky guessing by dwarf2out. */
9032 if (m->fs.realigned)
9034 gcc_checking_assert (stack_realign_drap);
9036 if (regno == REGNO (crtl->drap_reg))
9038 /* A bit of a hack. We force the DRAP register to be saved in
9039 the re-aligned stack frame, which provides us with a copy
9040 of the CFA that will last past the prologue. Install it. */
9041 gcc_checking_assert (cfun->machine->fs.fp_valid);
9042 addr = plus_constant (hard_frame_pointer_rtx,
9043 cfun->machine->fs.fp_offset - cfa_offset);
9044 mem = gen_rtx_MEM (mode, addr);
9045 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9049 /* The frame pointer is a stable reference within the
9050 aligned frame. Use it. */
9051 gcc_checking_assert (cfun->machine->fs.fp_valid);
9052 addr = plus_constant (hard_frame_pointer_rtx,
9053 cfun->machine->fs.fp_offset - cfa_offset);
9054 mem = gen_rtx_MEM (mode, addr);
9055 add_reg_note (insn, REG_CFA_EXPRESSION,
9056 gen_rtx_SET (VOIDmode, mem, reg));
9060 /* The memory may not be relative to the current CFA register,
9061 which means that we may need to generate a new pattern for
9062 use by the unwind info. */
9063 else if (base != m->fs.cfa_reg)
9065 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9066 mem = gen_rtx_MEM (mode, addr);
9067 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9071 /* Emit code to save registers using MOV insns.
9072 First register is stored at CFA - CFA_OFFSET. */
9074 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9078 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9079 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9081 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9082 cfa_offset -= UNITS_PER_WORD;
9086 /* Emit code to save SSE registers using MOV insns.
9087 First register is stored at CFA - CFA_OFFSET. */
9089 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9093 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9094 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9096 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9101 static GTY(()) rtx queued_cfa_restores;
9103 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9104 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9105 Don't add the note if the previously saved value will be left untouched
9106 within stack red-zone till return, as unwinders can find the same value
9107 in the register and on the stack. */
9110 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9112 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9117 add_reg_note (insn, REG_CFA_RESTORE, reg);
9118 RTX_FRAME_RELATED_P (insn) = 1;
9122 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9125 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9128 ix86_add_queued_cfa_restore_notes (rtx insn)
9131 if (!queued_cfa_restores)
9133 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9135 XEXP (last, 1) = REG_NOTES (insn);
9136 REG_NOTES (insn) = queued_cfa_restores;
9137 queued_cfa_restores = NULL_RTX;
9138 RTX_FRAME_RELATED_P (insn) = 1;
9141 /* Expand prologue or epilogue stack adjustment.
9142 The pattern exist to put a dependency on all ebp-based memory accesses.
9143 STYLE should be negative if instructions should be marked as frame related,
9144 zero if %r11 register is live and cannot be freely used and positive
9148 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9149 int style, bool set_cfa)
9151 struct machine_function *m = cfun->machine;
9153 bool add_frame_related_expr = false;
9156 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9157 else if (x86_64_immediate_operand (offset, DImode))
9158 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9162 /* r11 is used by indirect sibcall return as well, set before the
9163 epilogue and used after the epilogue. */
9165 tmp = gen_rtx_REG (DImode, R11_REG);
9168 gcc_assert (src != hard_frame_pointer_rtx
9169 && dest != hard_frame_pointer_rtx);
9170 tmp = hard_frame_pointer_rtx;
9172 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9174 add_frame_related_expr = true;
9176 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9179 insn = emit_insn (insn);
9181 ix86_add_queued_cfa_restore_notes (insn);
9187 gcc_assert (m->fs.cfa_reg == src);
9188 m->fs.cfa_offset += INTVAL (offset);
9189 m->fs.cfa_reg = dest;
9191 r = gen_rtx_PLUS (Pmode, src, offset);
9192 r = gen_rtx_SET (VOIDmode, dest, r);
9193 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9194 RTX_FRAME_RELATED_P (insn) = 1;
9198 RTX_FRAME_RELATED_P (insn) = 1;
9199 if (add_frame_related_expr)
9201 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9202 r = gen_rtx_SET (VOIDmode, dest, r);
9203 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9207 if (dest == stack_pointer_rtx)
9209 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9210 bool valid = m->fs.sp_valid;
9212 if (src == hard_frame_pointer_rtx)
9214 valid = m->fs.fp_valid;
9215 ooffset = m->fs.fp_offset;
9217 else if (src == crtl->drap_reg)
9219 valid = m->fs.drap_valid;
9224 /* Else there are two possibilities: SP itself, which we set
9225 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9226 taken care of this by hand along the eh_return path. */
9227 gcc_checking_assert (src == stack_pointer_rtx
9228 || offset == const0_rtx);
9231 m->fs.sp_offset = ooffset - INTVAL (offset);
9232 m->fs.sp_valid = valid;
9236 /* Find an available register to be used as dynamic realign argument
9237 pointer regsiter. Such a register will be written in prologue and
9238 used in begin of body, so it must not be
9239 1. parameter passing register.
9241 We reuse static-chain register if it is available. Otherwise, we
9242 use DI for i386 and R13 for x86-64. We chose R13 since it has
9245 Return: the regno of chosen register. */
9248 find_drap_reg (void)
9250 tree decl = cfun->decl;
9254 /* Use R13 for nested function or function need static chain.
9255 Since function with tail call may use any caller-saved
9256 registers in epilogue, DRAP must not use caller-saved
9257 register in such case. */
9258 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9265 /* Use DI for nested function or function need static chain.
9266 Since function with tail call may use any caller-saved
9267 registers in epilogue, DRAP must not use caller-saved
9268 register in such case. */
9269 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9272 /* Reuse static chain register if it isn't used for parameter
9274 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9276 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9277 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9284 /* Return minimum incoming stack alignment. */
9287 ix86_minimum_incoming_stack_boundary (bool sibcall)
9289 unsigned int incoming_stack_boundary;
9291 /* Prefer the one specified at command line. */
9292 if (ix86_user_incoming_stack_boundary)
9293 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9294 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9295 if -mstackrealign is used, it isn't used for sibcall check and
9296 estimated stack alignment is 128bit. */
9299 && ix86_force_align_arg_pointer
9300 && crtl->stack_alignment_estimated == 128)
9301 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9303 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9305 /* Incoming stack alignment can be changed on individual functions
9306 via force_align_arg_pointer attribute. We use the smallest
9307 incoming stack boundary. */
9308 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9309 && lookup_attribute (ix86_force_align_arg_pointer_string,
9310 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9311 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9313 /* The incoming stack frame has to be aligned at least at
9314 parm_stack_boundary. */
9315 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9316 incoming_stack_boundary = crtl->parm_stack_boundary;
9318 /* Stack at entrance of main is aligned by runtime. We use the
9319 smallest incoming stack boundary. */
9320 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9321 && DECL_NAME (current_function_decl)
9322 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9323 && DECL_FILE_SCOPE_P (current_function_decl))
9324 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9326 return incoming_stack_boundary;
9329 /* Update incoming stack boundary and estimated stack alignment. */
9332 ix86_update_stack_boundary (void)
9334 ix86_incoming_stack_boundary
9335 = ix86_minimum_incoming_stack_boundary (false);
9337 /* x86_64 vararg needs 16byte stack alignment for register save
9341 && crtl->stack_alignment_estimated < 128)
9342 crtl->stack_alignment_estimated = 128;
9345 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9346 needed or an rtx for DRAP otherwise. */
9349 ix86_get_drap_rtx (void)
9351 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9352 crtl->need_drap = true;
9354 if (stack_realign_drap)
9356 /* Assign DRAP to vDRAP and returns vDRAP */
9357 unsigned int regno = find_drap_reg ();
9362 arg_ptr = gen_rtx_REG (Pmode, regno);
9363 crtl->drap_reg = arg_ptr;
9366 drap_vreg = copy_to_reg (arg_ptr);
9370 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9373 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9374 RTX_FRAME_RELATED_P (insn) = 1;
9382 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9385 ix86_internal_arg_pointer (void)
9387 return virtual_incoming_args_rtx;
9390 struct scratch_reg {
9395 /* Return a short-lived scratch register for use on function entry.
9396 In 32-bit mode, it is valid only after the registers are saved
9397 in the prologue. This register must be released by means of
9398 release_scratch_register_on_entry once it is dead. */
9401 get_scratch_register_on_entry (struct scratch_reg *sr)
9409 /* We always use R11 in 64-bit mode. */
9414 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9416 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9417 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9418 int regparm = ix86_function_regparm (fntype, decl);
9420 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9422 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9423 for the static chain register. */
9424 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9425 && drap_regno != AX_REG)
9427 else if (regparm < 2 && drap_regno != DX_REG)
9429 /* ecx is the static chain register. */
9430 else if (regparm < 3 && !fastcall_p && !static_chain_p
9431 && drap_regno != CX_REG)
9433 else if (ix86_save_reg (BX_REG, true))
9435 /* esi is the static chain register. */
9436 else if (!(regparm == 3 && static_chain_p)
9437 && ix86_save_reg (SI_REG, true))
9439 else if (ix86_save_reg (DI_REG, true))
9443 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9448 sr->reg = gen_rtx_REG (Pmode, regno);
9451 rtx insn = emit_insn (gen_push (sr->reg));
9452 RTX_FRAME_RELATED_P (insn) = 1;
9456 /* Release a scratch register obtained from the preceding function. */
9459 release_scratch_register_on_entry (struct scratch_reg *sr)
9463 rtx x, insn = emit_insn (gen_pop (sr->reg));
9465 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9466 RTX_FRAME_RELATED_P (insn) = 1;
9467 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9468 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9469 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9473 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9475 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9478 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9480 /* We skip the probe for the first interval + a small dope of 4 words and
9481 probe that many bytes past the specified size to maintain a protection
9482 area at the botton of the stack. */
9483 const int dope = 4 * UNITS_PER_WORD;
9484 rtx size_rtx = GEN_INT (size), last;
9486 /* See if we have a constant small number of probes to generate. If so,
9487 that's the easy case. The run-time loop is made up of 11 insns in the
9488 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9489 for n # of intervals. */
9490 if (size <= 5 * PROBE_INTERVAL)
9492 HOST_WIDE_INT i, adjust;
9493 bool first_probe = true;
9495 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9496 values of N from 1 until it exceeds SIZE. If only one probe is
9497 needed, this will not generate any code. Then adjust and probe
9498 to PROBE_INTERVAL + SIZE. */
9499 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9503 adjust = 2 * PROBE_INTERVAL + dope;
9504 first_probe = false;
9507 adjust = PROBE_INTERVAL;
9509 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9510 plus_constant (stack_pointer_rtx, -adjust)));
9511 emit_stack_probe (stack_pointer_rtx);
9515 adjust = size + PROBE_INTERVAL + dope;
9517 adjust = size + PROBE_INTERVAL - i;
9519 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9520 plus_constant (stack_pointer_rtx, -adjust)));
9521 emit_stack_probe (stack_pointer_rtx);
9523 /* Adjust back to account for the additional first interval. */
9524 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9525 plus_constant (stack_pointer_rtx,
9526 PROBE_INTERVAL + dope)));
9529 /* Otherwise, do the same as above, but in a loop. Note that we must be
9530 extra careful with variables wrapping around because we might be at
9531 the very top (or the very bottom) of the address space and we have
9532 to be able to handle this case properly; in particular, we use an
9533 equality test for the loop condition. */
9536 HOST_WIDE_INT rounded_size;
9537 struct scratch_reg sr;
9539 get_scratch_register_on_entry (&sr);
9542 /* Step 1: round SIZE to the previous multiple of the interval. */
9544 rounded_size = size & -PROBE_INTERVAL;
9547 /* Step 2: compute initial and final value of the loop counter. */
9549 /* SP = SP_0 + PROBE_INTERVAL. */
9550 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9551 plus_constant (stack_pointer_rtx,
9552 - (PROBE_INTERVAL + dope))));
9554 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9555 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9556 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9557 gen_rtx_PLUS (Pmode, sr.reg,
9558 stack_pointer_rtx)));
9563 while (SP != LAST_ADDR)
9565 SP = SP + PROBE_INTERVAL
9569 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9570 values of N from 1 until it is equal to ROUNDED_SIZE. */
9572 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9575 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9576 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9578 if (size != rounded_size)
9580 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9581 plus_constant (stack_pointer_rtx,
9582 rounded_size - size)));
9583 emit_stack_probe (stack_pointer_rtx);
9586 /* Adjust back to account for the additional first interval. */
9587 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9588 plus_constant (stack_pointer_rtx,
9589 PROBE_INTERVAL + dope)));
9591 release_scratch_register_on_entry (&sr);
9594 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9596 /* Even if the stack pointer isn't the CFA register, we need to correctly
9597 describe the adjustments made to it, in particular differentiate the
9598 frame-related ones from the frame-unrelated ones. */
9601 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9602 XVECEXP (expr, 0, 0)
9603 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9604 plus_constant (stack_pointer_rtx, -size));
9605 XVECEXP (expr, 0, 1)
9606 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9607 plus_constant (stack_pointer_rtx,
9608 PROBE_INTERVAL + dope + size));
9609 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9610 RTX_FRAME_RELATED_P (last) = 1;
9612 cfun->machine->fs.sp_offset += size;
9615 /* Make sure nothing is scheduled before we are done. */
9616 emit_insn (gen_blockage ());
9619 /* Adjust the stack pointer up to REG while probing it. */
9622 output_adjust_stack_and_probe (rtx reg)
9624 static int labelno = 0;
9625 char loop_lab[32], end_lab[32];
9628 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9629 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9631 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9633 /* Jump to END_LAB if SP == LAST_ADDR. */
9634 xops[0] = stack_pointer_rtx;
9636 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9637 fputs ("\tje\t", asm_out_file);
9638 assemble_name_raw (asm_out_file, end_lab);
9639 fputc ('\n', asm_out_file);
9641 /* SP = SP + PROBE_INTERVAL. */
9642 xops[1] = GEN_INT (PROBE_INTERVAL);
9643 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9646 xops[1] = const0_rtx;
9647 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9649 fprintf (asm_out_file, "\tjmp\t");
9650 assemble_name_raw (asm_out_file, loop_lab);
9651 fputc ('\n', asm_out_file);
9653 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9658 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9659 inclusive. These are offsets from the current stack pointer. */
9662 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9664 /* See if we have a constant small number of probes to generate. If so,
9665 that's the easy case. The run-time loop is made up of 7 insns in the
9666 generic case while the compile-time loop is made up of n insns for n #
9668 if (size <= 7 * PROBE_INTERVAL)
9672 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9673 it exceeds SIZE. If only one probe is needed, this will not
9674 generate any code. Then probe at FIRST + SIZE. */
9675 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9676 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9678 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9681 /* Otherwise, do the same as above, but in a loop. Note that we must be
9682 extra careful with variables wrapping around because we might be at
9683 the very top (or the very bottom) of the address space and we have
9684 to be able to handle this case properly; in particular, we use an
9685 equality test for the loop condition. */
9688 HOST_WIDE_INT rounded_size, last;
9689 struct scratch_reg sr;
9691 get_scratch_register_on_entry (&sr);
9694 /* Step 1: round SIZE to the previous multiple of the interval. */
9696 rounded_size = size & -PROBE_INTERVAL;
9699 /* Step 2: compute initial and final value of the loop counter. */
9701 /* TEST_OFFSET = FIRST. */
9702 emit_move_insn (sr.reg, GEN_INT (-first));
9704 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9705 last = first + rounded_size;
9710 while (TEST_ADDR != LAST_ADDR)
9712 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9716 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9717 until it is equal to ROUNDED_SIZE. */
9719 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9722 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9723 that SIZE is equal to ROUNDED_SIZE. */
9725 if (size != rounded_size)
9726 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9729 rounded_size - size));
9731 release_scratch_register_on_entry (&sr);
9734 /* Make sure nothing is scheduled before we are done. */
9735 emit_insn (gen_blockage ());
9738 /* Probe a range of stack addresses from REG to END, inclusive. These are
9739 offsets from the current stack pointer. */
9742 output_probe_stack_range (rtx reg, rtx end)
9744 static int labelno = 0;
9745 char loop_lab[32], end_lab[32];
9748 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9749 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9751 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9753 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9756 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9757 fputs ("\tje\t", asm_out_file);
9758 assemble_name_raw (asm_out_file, end_lab);
9759 fputc ('\n', asm_out_file);
9761 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9762 xops[1] = GEN_INT (PROBE_INTERVAL);
9763 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9765 /* Probe at TEST_ADDR. */
9766 xops[0] = stack_pointer_rtx;
9768 xops[2] = const0_rtx;
9769 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9771 fprintf (asm_out_file, "\tjmp\t");
9772 assemble_name_raw (asm_out_file, loop_lab);
9773 fputc ('\n', asm_out_file);
9775 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9780 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9781 to be generated in correct form. */
9783 ix86_finalize_stack_realign_flags (void)
9785 /* Check if stack realign is really needed after reload, and
9786 stores result in cfun */
9787 unsigned int incoming_stack_boundary
9788 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9789 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9790 unsigned int stack_realign = (incoming_stack_boundary
9791 < (current_function_is_leaf
9792 ? crtl->max_used_stack_slot_alignment
9793 : crtl->stack_alignment_needed));
9795 if (crtl->stack_realign_finalized)
9797 /* After stack_realign_needed is finalized, we can't no longer
9799 gcc_assert (crtl->stack_realign_needed == stack_realign);
9803 crtl->stack_realign_needed = stack_realign;
9804 crtl->stack_realign_finalized = true;
9808 /* Expand the prologue into a bunch of separate insns. */
9811 ix86_expand_prologue (void)
9813 struct machine_function *m = cfun->machine;
9816 struct ix86_frame frame;
9817 HOST_WIDE_INT allocate;
9818 bool int_registers_saved;
9820 ix86_finalize_stack_realign_flags ();
9822 /* DRAP should not coexist with stack_realign_fp */
9823 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9825 memset (&m->fs, 0, sizeof (m->fs));
9827 /* Initialize CFA state for before the prologue. */
9828 m->fs.cfa_reg = stack_pointer_rtx;
9829 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9831 /* Track SP offset to the CFA. We continue tracking this after we've
9832 swapped the CFA register away from SP. In the case of re-alignment
9833 this is fudged; we're interested to offsets within the local frame. */
9834 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9835 m->fs.sp_valid = true;
9837 ix86_compute_frame_layout (&frame);
9839 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9841 /* We should have already generated an error for any use of
9842 ms_hook on a nested function. */
9843 gcc_checking_assert (!ix86_static_chain_on_stack);
9845 /* Check if profiling is active and we shall use profiling before
9846 prologue variant. If so sorry. */
9847 if (crtl->profile && flag_fentry != 0)
9848 sorry ("ms_hook_prologue attribute isn%'t compatible "
9849 "with -mfentry for 32-bit");
9851 /* In ix86_asm_output_function_label we emitted:
9852 8b ff movl.s %edi,%edi
9854 8b ec movl.s %esp,%ebp
9856 This matches the hookable function prologue in Win32 API
9857 functions in Microsoft Windows XP Service Pack 2 and newer.
9858 Wine uses this to enable Windows apps to hook the Win32 API
9859 functions provided by Wine.
9861 What that means is that we've already set up the frame pointer. */
9863 if (frame_pointer_needed
9864 && !(crtl->drap_reg && crtl->stack_realign_needed))
9868 /* We've decided to use the frame pointer already set up.
9869 Describe this to the unwinder by pretending that both
9870 push and mov insns happen right here.
9872 Putting the unwind info here at the end of the ms_hook
9873 is done so that we can make absolutely certain we get
9874 the required byte sequence at the start of the function,
9875 rather than relying on an assembler that can produce
9876 the exact encoding required.
9878 However it does mean (in the unpatched case) that we have
9879 a 1 insn window where the asynchronous unwind info is
9880 incorrect. However, if we placed the unwind info at
9881 its correct location we would have incorrect unwind info
9882 in the patched case. Which is probably all moot since
9883 I don't expect Wine generates dwarf2 unwind info for the
9884 system libraries that use this feature. */
9886 insn = emit_insn (gen_blockage ());
9888 push = gen_push (hard_frame_pointer_rtx);
9889 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9891 RTX_FRAME_RELATED_P (push) = 1;
9892 RTX_FRAME_RELATED_P (mov) = 1;
9894 RTX_FRAME_RELATED_P (insn) = 1;
9895 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9896 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9898 /* Note that gen_push incremented m->fs.cfa_offset, even
9899 though we didn't emit the push insn here. */
9900 m->fs.cfa_reg = hard_frame_pointer_rtx;
9901 m->fs.fp_offset = m->fs.cfa_offset;
9902 m->fs.fp_valid = true;
9906 /* The frame pointer is not needed so pop %ebp again.
9907 This leaves us with a pristine state. */
9908 emit_insn (gen_pop (hard_frame_pointer_rtx));
9912 /* The first insn of a function that accepts its static chain on the
9913 stack is to push the register that would be filled in by a direct
9914 call. This insn will be skipped by the trampoline. */
9915 else if (ix86_static_chain_on_stack)
9917 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9918 emit_insn (gen_blockage ());
9920 /* We don't want to interpret this push insn as a register save,
9921 only as a stack adjustment. The real copy of the register as
9922 a save will be done later, if needed. */
9923 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9924 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9925 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9926 RTX_FRAME_RELATED_P (insn) = 1;
9929 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9930 of DRAP is needed and stack realignment is really needed after reload */
9931 if (stack_realign_drap)
9933 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9935 /* Only need to push parameter pointer reg if it is caller saved. */
9936 if (!call_used_regs[REGNO (crtl->drap_reg)])
9938 /* Push arg pointer reg */
9939 insn = emit_insn (gen_push (crtl->drap_reg));
9940 RTX_FRAME_RELATED_P (insn) = 1;
9943 /* Grab the argument pointer. */
9944 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9945 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9946 RTX_FRAME_RELATED_P (insn) = 1;
9947 m->fs.cfa_reg = crtl->drap_reg;
9948 m->fs.cfa_offset = 0;
9950 /* Align the stack. */
9951 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9953 GEN_INT (-align_bytes)));
9954 RTX_FRAME_RELATED_P (insn) = 1;
9956 /* Replicate the return address on the stack so that return
9957 address can be reached via (argp - 1) slot. This is needed
9958 to implement macro RETURN_ADDR_RTX and intrinsic function
9959 expand_builtin_return_addr etc. */
9960 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9961 t = gen_frame_mem (Pmode, t);
9962 insn = emit_insn (gen_push (t));
9963 RTX_FRAME_RELATED_P (insn) = 1;
9965 /* For the purposes of frame and register save area addressing,
9966 we've started over with a new frame. */
9967 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9968 m->fs.realigned = true;
9971 if (frame_pointer_needed && !m->fs.fp_valid)
9973 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9974 slower on all targets. Also sdb doesn't like it. */
9975 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9976 RTX_FRAME_RELATED_P (insn) = 1;
9978 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
9980 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9981 RTX_FRAME_RELATED_P (insn) = 1;
9983 if (m->fs.cfa_reg == stack_pointer_rtx)
9984 m->fs.cfa_reg = hard_frame_pointer_rtx;
9985 m->fs.fp_offset = m->fs.sp_offset;
9986 m->fs.fp_valid = true;
9990 int_registers_saved = (frame.nregs == 0);
9992 if (!int_registers_saved)
9994 /* If saving registers via PUSH, do so now. */
9995 if (!frame.save_regs_using_mov)
9997 ix86_emit_save_regs ();
9998 int_registers_saved = true;
9999 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10002 /* When using red zone we may start register saving before allocating
10003 the stack frame saving one cycle of the prologue. However, avoid
10004 doing this if we have to probe the stack; at least on x86_64 the
10005 stack probe can turn into a call that clobbers a red zone location. */
10006 else if (ix86_using_red_zone ()
10007 && (! TARGET_STACK_PROBE
10008 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10010 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10011 int_registers_saved = true;
10015 if (stack_realign_fp)
10017 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10018 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10020 /* The computation of the size of the re-aligned stack frame means
10021 that we must allocate the size of the register save area before
10022 performing the actual alignment. Otherwise we cannot guarantee
10023 that there's enough storage above the realignment point. */
10024 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10025 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10026 GEN_INT (m->fs.sp_offset
10027 - frame.sse_reg_save_offset),
10030 /* Align the stack. */
10031 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10033 GEN_INT (-align_bytes)));
10035 /* For the purposes of register save area addressing, the stack
10036 pointer is no longer valid. As for the value of sp_offset,
10037 see ix86_compute_frame_layout, which we need to match in order
10038 to pass verification of stack_pointer_offset at the end. */
10039 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10040 m->fs.sp_valid = false;
10043 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10045 if (flag_stack_usage_info)
10047 /* We start to count from ARG_POINTER. */
10048 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10050 /* If it was realigned, take into account the fake frame. */
10051 if (stack_realign_drap)
10053 if (ix86_static_chain_on_stack)
10054 stack_size += UNITS_PER_WORD;
10056 if (!call_used_regs[REGNO (crtl->drap_reg)])
10057 stack_size += UNITS_PER_WORD;
10059 /* This over-estimates by 1 minimal-stack-alignment-unit but
10060 mitigates that by counting in the new return address slot. */
10061 current_function_dynamic_stack_size
10062 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10065 current_function_static_stack_size = stack_size;
10068 /* The stack has already been decremented by the instruction calling us
10069 so probe if the size is non-negative to preserve the protection area. */
10070 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10072 /* We expect the registers to be saved when probes are used. */
10073 gcc_assert (int_registers_saved);
10075 if (STACK_CHECK_MOVING_SP)
10077 ix86_adjust_stack_and_probe (allocate);
10082 HOST_WIDE_INT size = allocate;
10084 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10085 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10087 if (TARGET_STACK_PROBE)
10088 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10090 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10096 else if (!ix86_target_stack_probe ()
10097 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10099 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10100 GEN_INT (-allocate), -1,
10101 m->fs.cfa_reg == stack_pointer_rtx);
10105 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10107 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10109 bool eax_live = false;
10110 bool r10_live = false;
10113 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10114 if (!TARGET_64BIT_MS_ABI)
10115 eax_live = ix86_eax_live_at_start_p ();
10119 emit_insn (gen_push (eax));
10120 allocate -= UNITS_PER_WORD;
10124 r10 = gen_rtx_REG (Pmode, R10_REG);
10125 emit_insn (gen_push (r10));
10126 allocate -= UNITS_PER_WORD;
10129 emit_move_insn (eax, GEN_INT (allocate));
10130 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10132 /* Use the fact that AX still contains ALLOCATE. */
10133 adjust_stack_insn = (TARGET_64BIT
10134 ? gen_pro_epilogue_adjust_stack_di_sub
10135 : gen_pro_epilogue_adjust_stack_si_sub);
10137 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10138 stack_pointer_rtx, eax));
10140 /* Note that SEH directives need to continue tracking the stack
10141 pointer even after the frame pointer has been set up. */
10142 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10144 if (m->fs.cfa_reg == stack_pointer_rtx)
10145 m->fs.cfa_offset += allocate;
10147 RTX_FRAME_RELATED_P (insn) = 1;
10148 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10149 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10150 plus_constant (stack_pointer_rtx,
10153 m->fs.sp_offset += allocate;
10155 if (r10_live && eax_live)
10157 t = choose_baseaddr (m->fs.sp_offset - allocate);
10158 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10159 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10160 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10162 else if (eax_live || r10_live)
10164 t = choose_baseaddr (m->fs.sp_offset - allocate);
10165 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10168 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10170 /* If we havn't already set up the frame pointer, do so now. */
10171 if (frame_pointer_needed && !m->fs.fp_valid)
10173 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10174 GEN_INT (frame.stack_pointer_offset
10175 - frame.hard_frame_pointer_offset));
10176 insn = emit_insn (insn);
10177 RTX_FRAME_RELATED_P (insn) = 1;
10178 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10180 if (m->fs.cfa_reg == stack_pointer_rtx)
10181 m->fs.cfa_reg = hard_frame_pointer_rtx;
10182 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10183 m->fs.fp_valid = true;
10186 if (!int_registers_saved)
10187 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10188 if (frame.nsseregs)
10189 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10191 pic_reg_used = false;
10192 if (pic_offset_table_rtx
10193 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10196 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10198 if (alt_pic_reg_used != INVALID_REGNUM)
10199 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10201 pic_reg_used = true;
10208 if (ix86_cmodel == CM_LARGE_PIC)
10210 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10211 rtx label = gen_label_rtx ();
10212 emit_label (label);
10213 LABEL_PRESERVE_P (label) = 1;
10214 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10215 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10216 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10217 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10218 pic_offset_table_rtx, tmp_reg));
10221 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10225 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10226 RTX_FRAME_RELATED_P (insn) = 1;
10227 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10231 /* In the pic_reg_used case, make sure that the got load isn't deleted
10232 when mcount needs it. Blockage to avoid call movement across mcount
10233 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10235 if (crtl->profile && !flag_fentry && pic_reg_used)
10236 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10238 if (crtl->drap_reg && !crtl->stack_realign_needed)
10240 /* vDRAP is setup but after reload it turns out stack realign
10241 isn't necessary, here we will emit prologue to setup DRAP
10242 without stack realign adjustment */
10243 t = choose_baseaddr (0);
10244 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10247 /* Prevent instructions from being scheduled into register save push
10248 sequence when access to the redzone area is done through frame pointer.
10249 The offset between the frame pointer and the stack pointer is calculated
10250 relative to the value of the stack pointer at the end of the function
10251 prologue, and moving instructions that access redzone area via frame
10252 pointer inside push sequence violates this assumption. */
10253 if (frame_pointer_needed && frame.red_zone_size)
10254 emit_insn (gen_memory_blockage ());
10256 /* Emit cld instruction if stringops are used in the function. */
10257 if (TARGET_CLD && ix86_current_function_needs_cld)
10258 emit_insn (gen_cld ());
10260 /* SEH requires that the prologue end within 256 bytes of the start of
10261 the function. Prevent instruction schedules that would extend that.
10262 Further, prevent alloca modifications to the stack pointer from being
10263 combined with prologue modifications. */
10265 emit_insn (gen_prologue_use (stack_pointer_rtx));
10268 /* Emit code to restore REG using a POP insn. */
10271 ix86_emit_restore_reg_using_pop (rtx reg)
10273 struct machine_function *m = cfun->machine;
10274 rtx insn = emit_insn (gen_pop (reg));
10276 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10277 m->fs.sp_offset -= UNITS_PER_WORD;
10279 if (m->fs.cfa_reg == crtl->drap_reg
10280 && REGNO (reg) == REGNO (crtl->drap_reg))
10282 /* Previously we'd represented the CFA as an expression
10283 like *(%ebp - 8). We've just popped that value from
10284 the stack, which means we need to reset the CFA to
10285 the drap register. This will remain until we restore
10286 the stack pointer. */
10287 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10288 RTX_FRAME_RELATED_P (insn) = 1;
10290 /* This means that the DRAP register is valid for addressing too. */
10291 m->fs.drap_valid = true;
10295 if (m->fs.cfa_reg == stack_pointer_rtx)
10297 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10298 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10299 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10300 RTX_FRAME_RELATED_P (insn) = 1;
10302 m->fs.cfa_offset -= UNITS_PER_WORD;
10305 /* When the frame pointer is the CFA, and we pop it, we are
10306 swapping back to the stack pointer as the CFA. This happens
10307 for stack frames that don't allocate other data, so we assume
10308 the stack pointer is now pointing at the return address, i.e.
10309 the function entry state, which makes the offset be 1 word. */
10310 if (reg == hard_frame_pointer_rtx)
10312 m->fs.fp_valid = false;
10313 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10315 m->fs.cfa_reg = stack_pointer_rtx;
10316 m->fs.cfa_offset -= UNITS_PER_WORD;
10318 add_reg_note (insn, REG_CFA_DEF_CFA,
10319 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10320 GEN_INT (m->fs.cfa_offset)));
10321 RTX_FRAME_RELATED_P (insn) = 1;
10326 /* Emit code to restore saved registers using POP insns. */
10329 ix86_emit_restore_regs_using_pop (void)
10331 unsigned int regno;
10333 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10334 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10335 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10338 /* Emit code and notes for the LEAVE instruction. */
10341 ix86_emit_leave (void)
10343 struct machine_function *m = cfun->machine;
10344 rtx insn = emit_insn (ix86_gen_leave ());
10346 ix86_add_queued_cfa_restore_notes (insn);
10348 gcc_assert (m->fs.fp_valid);
10349 m->fs.sp_valid = true;
10350 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10351 m->fs.fp_valid = false;
10353 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10355 m->fs.cfa_reg = stack_pointer_rtx;
10356 m->fs.cfa_offset = m->fs.sp_offset;
10358 add_reg_note (insn, REG_CFA_DEF_CFA,
10359 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10360 RTX_FRAME_RELATED_P (insn) = 1;
10361 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10366 /* Emit code to restore saved registers using MOV insns.
10367 First register is restored from CFA - CFA_OFFSET. */
10369 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10370 bool maybe_eh_return)
10372 struct machine_function *m = cfun->machine;
10373 unsigned int regno;
10375 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10376 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10378 rtx reg = gen_rtx_REG (Pmode, regno);
10381 mem = choose_baseaddr (cfa_offset);
10382 mem = gen_frame_mem (Pmode, mem);
10383 insn = emit_move_insn (reg, mem);
10385 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10387 /* Previously we'd represented the CFA as an expression
10388 like *(%ebp - 8). We've just popped that value from
10389 the stack, which means we need to reset the CFA to
10390 the drap register. This will remain until we restore
10391 the stack pointer. */
10392 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10393 RTX_FRAME_RELATED_P (insn) = 1;
10395 /* This means that the DRAP register is valid for addressing. */
10396 m->fs.drap_valid = true;
10399 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10401 cfa_offset -= UNITS_PER_WORD;
10405 /* Emit code to restore saved registers using MOV insns.
10406 First register is restored from CFA - CFA_OFFSET. */
10408 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10409 bool maybe_eh_return)
10411 unsigned int regno;
10413 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10414 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10416 rtx reg = gen_rtx_REG (V4SFmode, regno);
10419 mem = choose_baseaddr (cfa_offset);
10420 mem = gen_rtx_MEM (V4SFmode, mem);
10421 set_mem_align (mem, 128);
10422 emit_move_insn (reg, mem);
10424 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10430 /* Restore function stack, frame, and registers. */
10433 ix86_expand_epilogue (int style)
10435 struct machine_function *m = cfun->machine;
10436 struct machine_frame_state frame_state_save = m->fs;
10437 struct ix86_frame frame;
10438 bool restore_regs_via_mov;
10441 ix86_finalize_stack_realign_flags ();
10442 ix86_compute_frame_layout (&frame);
10444 m->fs.sp_valid = (!frame_pointer_needed
10445 || (current_function_sp_is_unchanging
10446 && !stack_realign_fp));
10447 gcc_assert (!m->fs.sp_valid
10448 || m->fs.sp_offset == frame.stack_pointer_offset);
10450 /* The FP must be valid if the frame pointer is present. */
10451 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10452 gcc_assert (!m->fs.fp_valid
10453 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10455 /* We must have *some* valid pointer to the stack frame. */
10456 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10458 /* The DRAP is never valid at this point. */
10459 gcc_assert (!m->fs.drap_valid);
10461 /* See the comment about red zone and frame
10462 pointer usage in ix86_expand_prologue. */
10463 if (frame_pointer_needed && frame.red_zone_size)
10464 emit_insn (gen_memory_blockage ());
10466 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10467 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10469 /* Determine the CFA offset of the end of the red-zone. */
10470 m->fs.red_zone_offset = 0;
10471 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10473 /* The red-zone begins below the return address. */
10474 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10476 /* When the register save area is in the aligned portion of
10477 the stack, determine the maximum runtime displacement that
10478 matches up with the aligned frame. */
10479 if (stack_realign_drap)
10480 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10484 /* Special care must be taken for the normal return case of a function
10485 using eh_return: the eax and edx registers are marked as saved, but
10486 not restored along this path. Adjust the save location to match. */
10487 if (crtl->calls_eh_return && style != 2)
10488 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10490 /* EH_RETURN requires the use of moves to function properly. */
10491 if (crtl->calls_eh_return)
10492 restore_regs_via_mov = true;
10493 /* SEH requires the use of pops to identify the epilogue. */
10494 else if (TARGET_SEH)
10495 restore_regs_via_mov = false;
10496 /* If we're only restoring one register and sp is not valid then
10497 using a move instruction to restore the register since it's
10498 less work than reloading sp and popping the register. */
10499 else if (!m->fs.sp_valid && frame.nregs <= 1)
10500 restore_regs_via_mov = true;
10501 else if (TARGET_EPILOGUE_USING_MOVE
10502 && cfun->machine->use_fast_prologue_epilogue
10503 && (frame.nregs > 1
10504 || m->fs.sp_offset != frame.reg_save_offset))
10505 restore_regs_via_mov = true;
10506 else if (frame_pointer_needed
10508 && m->fs.sp_offset != frame.reg_save_offset)
10509 restore_regs_via_mov = true;
10510 else if (frame_pointer_needed
10511 && TARGET_USE_LEAVE
10512 && cfun->machine->use_fast_prologue_epilogue
10513 && frame.nregs == 1)
10514 restore_regs_via_mov = true;
10516 restore_regs_via_mov = false;
10518 if (restore_regs_via_mov || frame.nsseregs)
10520 /* Ensure that the entire register save area is addressable via
10521 the stack pointer, if we will restore via sp. */
10523 && m->fs.sp_offset > 0x7fffffff
10524 && !(m->fs.fp_valid || m->fs.drap_valid)
10525 && (frame.nsseregs + frame.nregs) != 0)
10527 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10528 GEN_INT (m->fs.sp_offset
10529 - frame.sse_reg_save_offset),
10531 m->fs.cfa_reg == stack_pointer_rtx);
10535 /* If there are any SSE registers to restore, then we have to do it
10536 via moves, since there's obviously no pop for SSE regs. */
10537 if (frame.nsseregs)
10538 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10541 if (restore_regs_via_mov)
10546 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10548 /* eh_return epilogues need %ecx added to the stack pointer. */
10551 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10553 /* Stack align doesn't work with eh_return. */
10554 gcc_assert (!stack_realign_drap);
10555 /* Neither does regparm nested functions. */
10556 gcc_assert (!ix86_static_chain_on_stack);
10558 if (frame_pointer_needed)
10560 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10561 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10562 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10564 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10565 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10567 /* Note that we use SA as a temporary CFA, as the return
10568 address is at the proper place relative to it. We
10569 pretend this happens at the FP restore insn because
10570 prior to this insn the FP would be stored at the wrong
10571 offset relative to SA, and after this insn we have no
10572 other reasonable register to use for the CFA. We don't
10573 bother resetting the CFA to the SP for the duration of
10574 the return insn. */
10575 add_reg_note (insn, REG_CFA_DEF_CFA,
10576 plus_constant (sa, UNITS_PER_WORD));
10577 ix86_add_queued_cfa_restore_notes (insn);
10578 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10579 RTX_FRAME_RELATED_P (insn) = 1;
10581 m->fs.cfa_reg = sa;
10582 m->fs.cfa_offset = UNITS_PER_WORD;
10583 m->fs.fp_valid = false;
10585 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10586 const0_rtx, style, false);
10590 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10591 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10592 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10593 ix86_add_queued_cfa_restore_notes (insn);
10595 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10596 if (m->fs.cfa_offset != UNITS_PER_WORD)
10598 m->fs.cfa_offset = UNITS_PER_WORD;
10599 add_reg_note (insn, REG_CFA_DEF_CFA,
10600 plus_constant (stack_pointer_rtx,
10602 RTX_FRAME_RELATED_P (insn) = 1;
10605 m->fs.sp_offset = UNITS_PER_WORD;
10606 m->fs.sp_valid = true;
10611 /* SEH requires that the function end with (1) a stack adjustment
10612 if necessary, (2) a sequence of pops, and (3) a return or
10613 jump instruction. Prevent insns from the function body from
10614 being scheduled into this sequence. */
10617 /* Prevent a catch region from being adjacent to the standard
10618 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10619 several other flags that would be interesting to test are
10621 if (flag_non_call_exceptions)
10622 emit_insn (gen_nops (const1_rtx));
10624 emit_insn (gen_blockage ());
10627 /* First step is to deallocate the stack frame so that we can
10628 pop the registers. */
10629 if (!m->fs.sp_valid)
10631 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10632 GEN_INT (m->fs.fp_offset
10633 - frame.reg_save_offset),
10636 else if (m->fs.sp_offset != frame.reg_save_offset)
10638 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10639 GEN_INT (m->fs.sp_offset
10640 - frame.reg_save_offset),
10642 m->fs.cfa_reg == stack_pointer_rtx);
10645 ix86_emit_restore_regs_using_pop ();
10648 /* If we used a stack pointer and haven't already got rid of it,
10650 if (m->fs.fp_valid)
10652 /* If the stack pointer is valid and pointing at the frame
10653 pointer store address, then we only need a pop. */
10654 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10655 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10656 /* Leave results in shorter dependency chains on CPUs that are
10657 able to grok it fast. */
10658 else if (TARGET_USE_LEAVE
10659 || optimize_function_for_size_p (cfun)
10660 || !cfun->machine->use_fast_prologue_epilogue)
10661 ix86_emit_leave ();
10664 pro_epilogue_adjust_stack (stack_pointer_rtx,
10665 hard_frame_pointer_rtx,
10666 const0_rtx, style, !using_drap);
10667 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10673 int param_ptr_offset = UNITS_PER_WORD;
10676 gcc_assert (stack_realign_drap);
10678 if (ix86_static_chain_on_stack)
10679 param_ptr_offset += UNITS_PER_WORD;
10680 if (!call_used_regs[REGNO (crtl->drap_reg)])
10681 param_ptr_offset += UNITS_PER_WORD;
10683 insn = emit_insn (gen_rtx_SET
10684 (VOIDmode, stack_pointer_rtx,
10685 gen_rtx_PLUS (Pmode,
10687 GEN_INT (-param_ptr_offset))));
10688 m->fs.cfa_reg = stack_pointer_rtx;
10689 m->fs.cfa_offset = param_ptr_offset;
10690 m->fs.sp_offset = param_ptr_offset;
10691 m->fs.realigned = false;
10693 add_reg_note (insn, REG_CFA_DEF_CFA,
10694 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10695 GEN_INT (param_ptr_offset)));
10696 RTX_FRAME_RELATED_P (insn) = 1;
10698 if (!call_used_regs[REGNO (crtl->drap_reg)])
10699 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10702 /* At this point the stack pointer must be valid, and we must have
10703 restored all of the registers. We may not have deallocated the
10704 entire stack frame. We've delayed this until now because it may
10705 be possible to merge the local stack deallocation with the
10706 deallocation forced by ix86_static_chain_on_stack. */
10707 gcc_assert (m->fs.sp_valid);
10708 gcc_assert (!m->fs.fp_valid);
10709 gcc_assert (!m->fs.realigned);
10710 if (m->fs.sp_offset != UNITS_PER_WORD)
10712 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10713 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10717 /* Sibcall epilogues don't want a return instruction. */
10720 m->fs = frame_state_save;
10724 /* Emit vzeroupper if needed. */
10725 if (TARGET_VZEROUPPER
10726 && !TREE_THIS_VOLATILE (cfun->decl)
10727 && !cfun->machine->caller_return_avx256_p)
10728 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10730 if (crtl->args.pops_args && crtl->args.size)
10732 rtx popc = GEN_INT (crtl->args.pops_args);
10734 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10735 address, do explicit add, and jump indirectly to the caller. */
10737 if (crtl->args.pops_args >= 65536)
10739 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10742 /* There is no "pascal" calling convention in any 64bit ABI. */
10743 gcc_assert (!TARGET_64BIT);
10745 insn = emit_insn (gen_pop (ecx));
10746 m->fs.cfa_offset -= UNITS_PER_WORD;
10747 m->fs.sp_offset -= UNITS_PER_WORD;
10749 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10750 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10751 add_reg_note (insn, REG_CFA_REGISTER,
10752 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10753 RTX_FRAME_RELATED_P (insn) = 1;
10755 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10757 emit_jump_insn (gen_return_indirect_internal (ecx));
10760 emit_jump_insn (gen_return_pop_internal (popc));
10763 emit_jump_insn (gen_return_internal ());
10765 /* Restore the state back to the state from the prologue,
10766 so that it's correct for the next epilogue. */
10767 m->fs = frame_state_save;
10770 /* Reset from the function's potential modifications. */
10773 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10774 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10776 if (pic_offset_table_rtx)
10777 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10779 /* Mach-O doesn't support labels at the end of objects, so if
10780 it looks like we might want one, insert a NOP. */
10782 rtx insn = get_last_insn ();
10785 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10786 insn = PREV_INSN (insn);
10790 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10791 fputs ("\tnop\n", file);
10797 /* Return a scratch register to use in the split stack prologue. The
10798 split stack prologue is used for -fsplit-stack. It is the first
10799 instructions in the function, even before the regular prologue.
10800 The scratch register can be any caller-saved register which is not
10801 used for parameters or for the static chain. */
10803 static unsigned int
10804 split_stack_prologue_scratch_regno (void)
10813 is_fastcall = (lookup_attribute ("fastcall",
10814 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10816 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10820 if (DECL_STATIC_CHAIN (cfun->decl))
10822 sorry ("-fsplit-stack does not support fastcall with "
10823 "nested function");
10824 return INVALID_REGNUM;
10828 else if (regparm < 3)
10830 if (!DECL_STATIC_CHAIN (cfun->decl))
10836 sorry ("-fsplit-stack does not support 2 register "
10837 " parameters for a nested function");
10838 return INVALID_REGNUM;
10845 /* FIXME: We could make this work by pushing a register
10846 around the addition and comparison. */
10847 sorry ("-fsplit-stack does not support 3 register parameters");
10848 return INVALID_REGNUM;
10853 /* A SYMBOL_REF for the function which allocates new stackspace for
10856 static GTY(()) rtx split_stack_fn;
10858 /* A SYMBOL_REF for the more stack function when using the large
10861 static GTY(()) rtx split_stack_fn_large;
10863 /* Handle -fsplit-stack. These are the first instructions in the
10864 function, even before the regular prologue. */
10867 ix86_expand_split_stack_prologue (void)
10869 struct ix86_frame frame;
10870 HOST_WIDE_INT allocate;
10871 unsigned HOST_WIDE_INT args_size;
10872 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10873 rtx scratch_reg = NULL_RTX;
10874 rtx varargs_label = NULL_RTX;
10877 gcc_assert (flag_split_stack && reload_completed);
10879 ix86_finalize_stack_realign_flags ();
10880 ix86_compute_frame_layout (&frame);
10881 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10883 /* This is the label we will branch to if we have enough stack
10884 space. We expect the basic block reordering pass to reverse this
10885 branch if optimizing, so that we branch in the unlikely case. */
10886 label = gen_label_rtx ();
10888 /* We need to compare the stack pointer minus the frame size with
10889 the stack boundary in the TCB. The stack boundary always gives
10890 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10891 can compare directly. Otherwise we need to do an addition. */
10893 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10894 UNSPEC_STACK_CHECK);
10895 limit = gen_rtx_CONST (Pmode, limit);
10896 limit = gen_rtx_MEM (Pmode, limit);
10897 if (allocate < SPLIT_STACK_AVAILABLE)
10898 current = stack_pointer_rtx;
10901 unsigned int scratch_regno;
10904 /* We need a scratch register to hold the stack pointer minus
10905 the required frame size. Since this is the very start of the
10906 function, the scratch register can be any caller-saved
10907 register which is not used for parameters. */
10908 offset = GEN_INT (- allocate);
10909 scratch_regno = split_stack_prologue_scratch_regno ();
10910 if (scratch_regno == INVALID_REGNUM)
10912 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10913 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10915 /* We don't use ix86_gen_add3 in this case because it will
10916 want to split to lea, but when not optimizing the insn
10917 will not be split after this point. */
10918 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10919 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10924 emit_move_insn (scratch_reg, offset);
10925 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10926 stack_pointer_rtx));
10928 current = scratch_reg;
10931 ix86_expand_branch (GEU, current, limit, label);
10932 jump_insn = get_last_insn ();
10933 JUMP_LABEL (jump_insn) = label;
10935 /* Mark the jump as very likely to be taken. */
10936 add_reg_note (jump_insn, REG_BR_PROB,
10937 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10939 if (split_stack_fn == NULL_RTX)
10940 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10941 fn = split_stack_fn;
10943 /* Get more stack space. We pass in the desired stack space and the
10944 size of the arguments to copy to the new stack. In 32-bit mode
10945 we push the parameters; __morestack will return on a new stack
10946 anyhow. In 64-bit mode we pass the parameters in r10 and
10948 allocate_rtx = GEN_INT (allocate);
10949 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10950 call_fusage = NULL_RTX;
10955 reg10 = gen_rtx_REG (Pmode, R10_REG);
10956 reg11 = gen_rtx_REG (Pmode, R11_REG);
10958 /* If this function uses a static chain, it will be in %r10.
10959 Preserve it across the call to __morestack. */
10960 if (DECL_STATIC_CHAIN (cfun->decl))
10964 rax = gen_rtx_REG (Pmode, AX_REG);
10965 emit_move_insn (rax, reg10);
10966 use_reg (&call_fusage, rax);
10969 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10971 HOST_WIDE_INT argval;
10973 /* When using the large model we need to load the address
10974 into a register, and we've run out of registers. So we
10975 switch to a different calling convention, and we call a
10976 different function: __morestack_large. We pass the
10977 argument size in the upper 32 bits of r10 and pass the
10978 frame size in the lower 32 bits. */
10979 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
10980 gcc_assert ((args_size & 0xffffffff) == args_size);
10982 if (split_stack_fn_large == NULL_RTX)
10983 split_stack_fn_large =
10984 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10986 if (ix86_cmodel == CM_LARGE_PIC)
10990 label = gen_label_rtx ();
10991 emit_label (label);
10992 LABEL_PRESERVE_P (label) = 1;
10993 emit_insn (gen_set_rip_rex64 (reg10, label));
10994 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10995 emit_insn (gen_adddi3 (reg10, reg10, reg11));
10996 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
10998 x = gen_rtx_CONST (Pmode, x);
10999 emit_move_insn (reg11, x);
11000 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11001 x = gen_const_mem (Pmode, x);
11002 emit_move_insn (reg11, x);
11005 emit_move_insn (reg11, split_stack_fn_large);
11009 argval = ((args_size << 16) << 16) + allocate;
11010 emit_move_insn (reg10, GEN_INT (argval));
11014 emit_move_insn (reg10, allocate_rtx);
11015 emit_move_insn (reg11, GEN_INT (args_size));
11016 use_reg (&call_fusage, reg11);
11019 use_reg (&call_fusage, reg10);
11023 emit_insn (gen_push (GEN_INT (args_size)));
11024 emit_insn (gen_push (allocate_rtx));
11026 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11027 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11029 add_function_usage_to (call_insn, call_fusage);
11031 /* In order to make call/return prediction work right, we now need
11032 to execute a return instruction. See
11033 libgcc/config/i386/morestack.S for the details on how this works.
11035 For flow purposes gcc must not see this as a return
11036 instruction--we need control flow to continue at the subsequent
11037 label. Therefore, we use an unspec. */
11038 gcc_assert (crtl->args.pops_args < 65536);
11039 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11041 /* If we are in 64-bit mode and this function uses a static chain,
11042 we saved %r10 in %rax before calling _morestack. */
11043 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11044 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11045 gen_rtx_REG (Pmode, AX_REG));
11047 /* If this function calls va_start, we need to store a pointer to
11048 the arguments on the old stack, because they may not have been
11049 all copied to the new stack. At this point the old stack can be
11050 found at the frame pointer value used by __morestack, because
11051 __morestack has set that up before calling back to us. Here we
11052 store that pointer in a scratch register, and in
11053 ix86_expand_prologue we store the scratch register in a stack
11055 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11057 unsigned int scratch_regno;
11061 scratch_regno = split_stack_prologue_scratch_regno ();
11062 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11063 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11067 return address within this function
11068 return address of caller of this function
11070 So we add three words to get to the stack arguments.
11074 return address within this function
11075 first argument to __morestack
11076 second argument to __morestack
11077 return address of caller of this function
11079 So we add five words to get to the stack arguments.
11081 words = TARGET_64BIT ? 3 : 5;
11082 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11083 gen_rtx_PLUS (Pmode, frame_reg,
11084 GEN_INT (words * UNITS_PER_WORD))));
11086 varargs_label = gen_label_rtx ();
11087 emit_jump_insn (gen_jump (varargs_label));
11088 JUMP_LABEL (get_last_insn ()) = varargs_label;
11093 emit_label (label);
11094 LABEL_NUSES (label) = 1;
11096 /* If this function calls va_start, we now have to set the scratch
11097 register for the case where we do not call __morestack. In this
11098 case we need to set it based on the stack pointer. */
11099 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11101 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11102 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11103 GEN_INT (UNITS_PER_WORD))));
11105 emit_label (varargs_label);
11106 LABEL_NUSES (varargs_label) = 1;
11110 /* We may have to tell the dataflow pass that the split stack prologue
11111 is initializing a scratch register. */
11114 ix86_live_on_entry (bitmap regs)
11116 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11118 gcc_assert (flag_split_stack);
11119 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11123 /* Determine if op is suitable SUBREG RTX for address. */
11126 ix86_address_subreg_operand (rtx op)
11128 enum machine_mode mode;
11133 mode = GET_MODE (op);
11135 if (GET_MODE_CLASS (mode) != MODE_INT)
11138 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11139 failures when the register is one word out of a two word structure. */
11140 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11143 /* Allow only SUBREGs of non-eliminable hard registers. */
11144 return register_no_elim_operand (op, mode);
11147 /* Extract the parts of an RTL expression that is a valid memory address
11148 for an instruction. Return 0 if the structure of the address is
11149 grossly off. Return -1 if the address contains ASHIFT, so it is not
11150 strictly valid, but still used for computing length of lea instruction. */
11153 ix86_decompose_address (rtx addr, struct ix86_address *out)
11155 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11156 rtx base_reg, index_reg;
11157 HOST_WIDE_INT scale = 1;
11158 rtx scale_rtx = NULL_RTX;
11161 enum ix86_address_seg seg = SEG_DEFAULT;
11163 /* Allow zero-extended SImode addresses,
11164 they will be emitted with addr32 prefix. */
11165 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11167 if (GET_CODE (addr) == ZERO_EXTEND
11168 && GET_MODE (XEXP (addr, 0)) == SImode)
11169 addr = XEXP (addr, 0);
11170 else if (GET_CODE (addr) == AND
11171 && const_32bit_mask (XEXP (addr, 1), DImode))
11173 addr = XEXP (addr, 0);
11175 /* Strip subreg. */
11176 if (GET_CODE (addr) == SUBREG
11177 && GET_MODE (SUBREG_REG (addr)) == SImode)
11178 addr = SUBREG_REG (addr);
11184 else if (GET_CODE (addr) == SUBREG)
11186 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11191 else if (GET_CODE (addr) == PLUS)
11193 rtx addends[4], op;
11201 addends[n++] = XEXP (op, 1);
11204 while (GET_CODE (op) == PLUS);
11209 for (i = n; i >= 0; --i)
11212 switch (GET_CODE (op))
11217 index = XEXP (op, 0);
11218 scale_rtx = XEXP (op, 1);
11224 index = XEXP (op, 0);
11225 tmp = XEXP (op, 1);
11226 if (!CONST_INT_P (tmp))
11228 scale = INTVAL (tmp);
11229 if ((unsigned HOST_WIDE_INT) scale > 3)
11231 scale = 1 << scale;
11235 if (XINT (op, 1) == UNSPEC_TP
11236 && TARGET_TLS_DIRECT_SEG_REFS
11237 && seg == SEG_DEFAULT)
11238 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11244 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11271 else if (GET_CODE (addr) == MULT)
11273 index = XEXP (addr, 0); /* index*scale */
11274 scale_rtx = XEXP (addr, 1);
11276 else if (GET_CODE (addr) == ASHIFT)
11278 /* We're called for lea too, which implements ashift on occasion. */
11279 index = XEXP (addr, 0);
11280 tmp = XEXP (addr, 1);
11281 if (!CONST_INT_P (tmp))
11283 scale = INTVAL (tmp);
11284 if ((unsigned HOST_WIDE_INT) scale > 3)
11286 scale = 1 << scale;
11290 disp = addr; /* displacement */
11296 else if (GET_CODE (index) == SUBREG
11297 && ix86_address_subreg_operand (SUBREG_REG (index)))
11303 /* Extract the integral value of scale. */
11306 if (!CONST_INT_P (scale_rtx))
11308 scale = INTVAL (scale_rtx);
11311 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11312 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11314 /* Avoid useless 0 displacement. */
11315 if (disp == const0_rtx && (base || index))
11318 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11319 if (base_reg && index_reg && scale == 1
11320 && (index_reg == arg_pointer_rtx
11321 || index_reg == frame_pointer_rtx
11322 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11325 tmp = base, base = index, index = tmp;
11326 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11329 /* Special case: %ebp cannot be encoded as a base without a displacement.
11333 && (base_reg == hard_frame_pointer_rtx
11334 || base_reg == frame_pointer_rtx
11335 || base_reg == arg_pointer_rtx
11336 || (REG_P (base_reg)
11337 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11338 || REGNO (base_reg) == R13_REG))))
11341 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11342 Avoid this by transforming to [%esi+0].
11343 Reload calls address legitimization without cfun defined, so we need
11344 to test cfun for being non-NULL. */
11345 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11346 && base_reg && !index_reg && !disp
11347 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11350 /* Special case: encode reg+reg instead of reg*2. */
11351 if (!base && index && scale == 2)
11352 base = index, base_reg = index_reg, scale = 1;
11354 /* Special case: scaling cannot be encoded without base or displacement. */
11355 if (!base && !disp && index && scale != 1)
11359 out->index = index;
11361 out->scale = scale;
11367 /* Return cost of the memory address x.
11368 For i386, it is better to use a complex address than let gcc copy
11369 the address into a reg and make a new pseudo. But not if the address
11370 requires to two regs - that would mean more pseudos with longer
11373 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11375 struct ix86_address parts;
11377 int ok = ix86_decompose_address (x, &parts);
11381 if (parts.base && GET_CODE (parts.base) == SUBREG)
11382 parts.base = SUBREG_REG (parts.base);
11383 if (parts.index && GET_CODE (parts.index) == SUBREG)
11384 parts.index = SUBREG_REG (parts.index);
11386 /* Attempt to minimize number of registers in the address. */
11388 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11390 && (!REG_P (parts.index)
11391 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11395 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11397 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11398 && parts.base != parts.index)
11401 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11402 since it's predecode logic can't detect the length of instructions
11403 and it degenerates to vector decoded. Increase cost of such
11404 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11405 to split such addresses or even refuse such addresses at all.
11407 Following addressing modes are affected:
11412 The first and last case may be avoidable by explicitly coding the zero in
11413 memory address, but I don't have AMD-K6 machine handy to check this
11417 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11418 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11419 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11425 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11426 this is used for to form addresses to local data when -fPIC is in
11430 darwin_local_data_pic (rtx disp)
11432 return (GET_CODE (disp) == UNSPEC
11433 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11436 /* Determine if a given RTX is a valid constant. We already know this
11437 satisfies CONSTANT_P. */
11440 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11442 switch (GET_CODE (x))
11447 if (GET_CODE (x) == PLUS)
11449 if (!CONST_INT_P (XEXP (x, 1)))
11454 if (TARGET_MACHO && darwin_local_data_pic (x))
11457 /* Only some unspecs are valid as "constants". */
11458 if (GET_CODE (x) == UNSPEC)
11459 switch (XINT (x, 1))
11462 case UNSPEC_GOTOFF:
11463 case UNSPEC_PLTOFF:
11464 return TARGET_64BIT;
11466 case UNSPEC_NTPOFF:
11467 x = XVECEXP (x, 0, 0);
11468 return (GET_CODE (x) == SYMBOL_REF
11469 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11470 case UNSPEC_DTPOFF:
11471 x = XVECEXP (x, 0, 0);
11472 return (GET_CODE (x) == SYMBOL_REF
11473 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11478 /* We must have drilled down to a symbol. */
11479 if (GET_CODE (x) == LABEL_REF)
11481 if (GET_CODE (x) != SYMBOL_REF)
11486 /* TLS symbols are never valid. */
11487 if (SYMBOL_REF_TLS_MODEL (x))
11490 /* DLLIMPORT symbols are never valid. */
11491 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11492 && SYMBOL_REF_DLLIMPORT_P (x))
11496 /* mdynamic-no-pic */
11497 if (MACHO_DYNAMIC_NO_PIC_P)
11498 return machopic_symbol_defined_p (x);
11503 if (GET_MODE (x) == TImode
11504 && x != CONST0_RTX (TImode)
11510 if (!standard_sse_constant_p (x))
11517 /* Otherwise we handle everything else in the move patterns. */
11521 /* Determine if it's legal to put X into the constant pool. This
11522 is not possible for the address of thread-local symbols, which
11523 is checked above. */
11526 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11528 /* We can always put integral constants and vectors in memory. */
11529 switch (GET_CODE (x))
11539 return !ix86_legitimate_constant_p (mode, x);
11543 /* Nonzero if the constant value X is a legitimate general operand
11544 when generating PIC code. It is given that flag_pic is on and
11545 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11548 legitimate_pic_operand_p (rtx x)
11552 switch (GET_CODE (x))
11555 inner = XEXP (x, 0);
11556 if (GET_CODE (inner) == PLUS
11557 && CONST_INT_P (XEXP (inner, 1)))
11558 inner = XEXP (inner, 0);
11560 /* Only some unspecs are valid as "constants". */
11561 if (GET_CODE (inner) == UNSPEC)
11562 switch (XINT (inner, 1))
11565 case UNSPEC_GOTOFF:
11566 case UNSPEC_PLTOFF:
11567 return TARGET_64BIT;
11569 x = XVECEXP (inner, 0, 0);
11570 return (GET_CODE (x) == SYMBOL_REF
11571 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11572 case UNSPEC_MACHOPIC_OFFSET:
11573 return legitimate_pic_address_disp_p (x);
11581 return legitimate_pic_address_disp_p (x);
11588 /* Determine if a given CONST RTX is a valid memory displacement
11592 legitimate_pic_address_disp_p (rtx disp)
11596 /* In 64bit mode we can allow direct addresses of symbols and labels
11597 when they are not dynamic symbols. */
11600 rtx op0 = disp, op1;
11602 switch (GET_CODE (disp))
11608 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11610 op0 = XEXP (XEXP (disp, 0), 0);
11611 op1 = XEXP (XEXP (disp, 0), 1);
11612 if (!CONST_INT_P (op1)
11613 || INTVAL (op1) >= 16*1024*1024
11614 || INTVAL (op1) < -16*1024*1024)
11616 if (GET_CODE (op0) == LABEL_REF)
11618 if (GET_CODE (op0) != SYMBOL_REF)
11623 /* TLS references should always be enclosed in UNSPEC. */
11624 if (SYMBOL_REF_TLS_MODEL (op0))
11626 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11627 && ix86_cmodel != CM_LARGE_PIC)
11635 if (GET_CODE (disp) != CONST)
11637 disp = XEXP (disp, 0);
11641 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11642 of GOT tables. We should not need these anyway. */
11643 if (GET_CODE (disp) != UNSPEC
11644 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11645 && XINT (disp, 1) != UNSPEC_GOTOFF
11646 && XINT (disp, 1) != UNSPEC_PCREL
11647 && XINT (disp, 1) != UNSPEC_PLTOFF))
11650 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11651 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11657 if (GET_CODE (disp) == PLUS)
11659 if (!CONST_INT_P (XEXP (disp, 1)))
11661 disp = XEXP (disp, 0);
11665 if (TARGET_MACHO && darwin_local_data_pic (disp))
11668 if (GET_CODE (disp) != UNSPEC)
11671 switch (XINT (disp, 1))
11676 /* We need to check for both symbols and labels because VxWorks loads
11677 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11679 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11680 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11681 case UNSPEC_GOTOFF:
11682 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11683 While ABI specify also 32bit relocation but we don't produce it in
11684 small PIC model at all. */
11685 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11686 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11688 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11690 case UNSPEC_GOTTPOFF:
11691 case UNSPEC_GOTNTPOFF:
11692 case UNSPEC_INDNTPOFF:
11695 disp = XVECEXP (disp, 0, 0);
11696 return (GET_CODE (disp) == SYMBOL_REF
11697 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11698 case UNSPEC_NTPOFF:
11699 disp = XVECEXP (disp, 0, 0);
11700 return (GET_CODE (disp) == SYMBOL_REF
11701 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11702 case UNSPEC_DTPOFF:
11703 disp = XVECEXP (disp, 0, 0);
11704 return (GET_CODE (disp) == SYMBOL_REF
11705 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11711 /* Recognizes RTL expressions that are valid memory addresses for an
11712 instruction. The MODE argument is the machine mode for the MEM
11713 expression that wants to use this address.
11715 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11716 convert common non-canonical forms to canonical form so that they will
11720 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11721 rtx addr, bool strict)
11723 struct ix86_address parts;
11724 rtx base, index, disp;
11725 HOST_WIDE_INT scale;
11727 if (ix86_decompose_address (addr, &parts) <= 0)
11728 /* Decomposition failed. */
11732 index = parts.index;
11734 scale = parts.scale;
11736 /* Validate base register. */
11743 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
11744 reg = SUBREG_REG (base);
11746 /* Base is not a register. */
11749 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
11752 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11753 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11754 /* Base is not valid. */
11758 /* Validate index register. */
11765 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
11766 reg = SUBREG_REG (index);
11768 /* Index is not a register. */
11771 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
11774 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11775 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11776 /* Index is not valid. */
11780 /* Index and base should have the same mode. */
11782 && GET_MODE (base) != GET_MODE (index))
11785 /* Validate scale factor. */
11789 /* Scale without index. */
11792 if (scale != 2 && scale != 4 && scale != 8)
11793 /* Scale is not a valid multiplier. */
11797 /* Validate displacement. */
11800 if (GET_CODE (disp) == CONST
11801 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11802 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11803 switch (XINT (XEXP (disp, 0), 1))
11805 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11806 used. While ABI specify also 32bit relocations, we don't produce
11807 them at all and use IP relative instead. */
11809 case UNSPEC_GOTOFF:
11810 gcc_assert (flag_pic);
11812 goto is_legitimate_pic;
11814 /* 64bit address unspec. */
11817 case UNSPEC_GOTPCREL:
11819 gcc_assert (flag_pic);
11820 goto is_legitimate_pic;
11822 case UNSPEC_GOTTPOFF:
11823 case UNSPEC_GOTNTPOFF:
11824 case UNSPEC_INDNTPOFF:
11825 case UNSPEC_NTPOFF:
11826 case UNSPEC_DTPOFF:
11829 case UNSPEC_STACK_CHECK:
11830 gcc_assert (flag_split_stack);
11834 /* Invalid address unspec. */
11838 else if (SYMBOLIC_CONST (disp)
11842 && MACHOPIC_INDIRECT
11843 && !machopic_operand_p (disp)
11849 if (TARGET_64BIT && (index || base))
11851 /* foo@dtpoff(%rX) is ok. */
11852 if (GET_CODE (disp) != CONST
11853 || GET_CODE (XEXP (disp, 0)) != PLUS
11854 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11855 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11856 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11857 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11858 /* Non-constant pic memory reference. */
11861 else if ((!TARGET_MACHO || flag_pic)
11862 && ! legitimate_pic_address_disp_p (disp))
11863 /* Displacement is an invalid pic construct. */
11866 else if (MACHO_DYNAMIC_NO_PIC_P
11867 && !ix86_legitimate_constant_p (Pmode, disp))
11868 /* displacment must be referenced via non_lazy_pointer */
11872 /* This code used to verify that a symbolic pic displacement
11873 includes the pic_offset_table_rtx register.
11875 While this is good idea, unfortunately these constructs may
11876 be created by "adds using lea" optimization for incorrect
11885 This code is nonsensical, but results in addressing
11886 GOT table with pic_offset_table_rtx base. We can't
11887 just refuse it easily, since it gets matched by
11888 "addsi3" pattern, that later gets split to lea in the
11889 case output register differs from input. While this
11890 can be handled by separate addsi pattern for this case
11891 that never results in lea, this seems to be easier and
11892 correct fix for crash to disable this test. */
11894 else if (GET_CODE (disp) != LABEL_REF
11895 && !CONST_INT_P (disp)
11896 && (GET_CODE (disp) != CONST
11897 || !ix86_legitimate_constant_p (Pmode, disp))
11898 && (GET_CODE (disp) != SYMBOL_REF
11899 || !ix86_legitimate_constant_p (Pmode, disp)))
11900 /* Displacement is not constant. */
11902 else if (TARGET_64BIT
11903 && !x86_64_immediate_operand (disp, VOIDmode))
11904 /* Displacement is out of range. */
11908 /* Everything looks valid. */
11912 /* Determine if a given RTX is a valid constant address. */
11915 constant_address_p (rtx x)
11917 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11920 /* Return a unique alias set for the GOT. */
11922 static alias_set_type
11923 ix86_GOT_alias_set (void)
11925 static alias_set_type set = -1;
11927 set = new_alias_set ();
11931 /* Return a legitimate reference for ORIG (an address) using the
11932 register REG. If REG is 0, a new pseudo is generated.
11934 There are two types of references that must be handled:
11936 1. Global data references must load the address from the GOT, via
11937 the PIC reg. An insn is emitted to do this load, and the reg is
11940 2. Static data references, constant pool addresses, and code labels
11941 compute the address as an offset from the GOT, whose base is in
11942 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11943 differentiate them from global data objects. The returned
11944 address is the PIC reg + an unspec constant.
11946 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11947 reg also appears in the address. */
11950 legitimize_pic_address (rtx orig, rtx reg)
11953 rtx new_rtx = orig;
11957 if (TARGET_MACHO && !TARGET_64BIT)
11960 reg = gen_reg_rtx (Pmode);
11961 /* Use the generic Mach-O PIC machinery. */
11962 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11966 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11968 else if (TARGET_64BIT
11969 && ix86_cmodel != CM_SMALL_PIC
11970 && gotoff_operand (addr, Pmode))
11973 /* This symbol may be referenced via a displacement from the PIC
11974 base address (@GOTOFF). */
11976 if (reload_in_progress)
11977 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11978 if (GET_CODE (addr) == CONST)
11979 addr = XEXP (addr, 0);
11980 if (GET_CODE (addr) == PLUS)
11982 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11984 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11987 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11988 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11990 tmpreg = gen_reg_rtx (Pmode);
11993 emit_move_insn (tmpreg, new_rtx);
11997 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11998 tmpreg, 1, OPTAB_DIRECT);
12001 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12003 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12005 /* This symbol may be referenced via a displacement from the PIC
12006 base address (@GOTOFF). */
12008 if (reload_in_progress)
12009 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12010 if (GET_CODE (addr) == CONST)
12011 addr = XEXP (addr, 0);
12012 if (GET_CODE (addr) == PLUS)
12014 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12016 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12019 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12020 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12021 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12025 emit_move_insn (reg, new_rtx);
12029 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12030 /* We can't use @GOTOFF for text labels on VxWorks;
12031 see gotoff_operand. */
12032 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12034 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12036 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12037 return legitimize_dllimport_symbol (addr, true);
12038 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12039 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12040 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12042 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12043 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12047 /* For x64 PE-COFF there is no GOT table. So we use address
12049 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12051 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12052 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12055 reg = gen_reg_rtx (Pmode);
12056 emit_move_insn (reg, new_rtx);
12059 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12061 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12062 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12063 new_rtx = gen_const_mem (Pmode, new_rtx);
12064 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12067 reg = gen_reg_rtx (Pmode);
12068 /* Use directly gen_movsi, otherwise the address is loaded
12069 into register for CSE. We don't want to CSE this addresses,
12070 instead we CSE addresses from the GOT table, so skip this. */
12071 emit_insn (gen_movsi (reg, new_rtx));
12076 /* This symbol must be referenced via a load from the
12077 Global Offset Table (@GOT). */
12079 if (reload_in_progress)
12080 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12081 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12082 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12084 new_rtx = force_reg (Pmode, new_rtx);
12085 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12086 new_rtx = gen_const_mem (Pmode, new_rtx);
12087 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12090 reg = gen_reg_rtx (Pmode);
12091 emit_move_insn (reg, new_rtx);
12097 if (CONST_INT_P (addr)
12098 && !x86_64_immediate_operand (addr, VOIDmode))
12102 emit_move_insn (reg, addr);
12106 new_rtx = force_reg (Pmode, addr);
12108 else if (GET_CODE (addr) == CONST)
12110 addr = XEXP (addr, 0);
12112 /* We must match stuff we generate before. Assume the only
12113 unspecs that can get here are ours. Not that we could do
12114 anything with them anyway.... */
12115 if (GET_CODE (addr) == UNSPEC
12116 || (GET_CODE (addr) == PLUS
12117 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12119 gcc_assert (GET_CODE (addr) == PLUS);
12121 if (GET_CODE (addr) == PLUS)
12123 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12125 /* Check first to see if this is a constant offset from a @GOTOFF
12126 symbol reference. */
12127 if (gotoff_operand (op0, Pmode)
12128 && CONST_INT_P (op1))
12132 if (reload_in_progress)
12133 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12134 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12136 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12137 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12138 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12142 emit_move_insn (reg, new_rtx);
12148 if (INTVAL (op1) < -16*1024*1024
12149 || INTVAL (op1) >= 16*1024*1024)
12151 if (!x86_64_immediate_operand (op1, Pmode))
12152 op1 = force_reg (Pmode, op1);
12153 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12159 base = legitimize_pic_address (XEXP (addr, 0), reg);
12160 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12161 base == reg ? NULL_RTX : reg);
12163 if (CONST_INT_P (new_rtx))
12164 new_rtx = plus_constant (base, INTVAL (new_rtx));
12167 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12169 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12170 new_rtx = XEXP (new_rtx, 1);
12172 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12180 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12183 get_thread_pointer (bool to_reg)
12185 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12187 if (GET_MODE (tp) != Pmode)
12188 tp = convert_to_mode (Pmode, tp, 1);
12191 tp = copy_addr_to_reg (tp);
12196 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12198 static GTY(()) rtx ix86_tls_symbol;
12201 ix86_tls_get_addr (void)
12203 if (!ix86_tls_symbol)
12206 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12207 ? "___tls_get_addr" : "__tls_get_addr");
12209 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12212 return ix86_tls_symbol;
12215 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12217 static GTY(()) rtx ix86_tls_module_base_symbol;
12220 ix86_tls_module_base (void)
12222 if (!ix86_tls_module_base_symbol)
12224 ix86_tls_module_base_symbol
12225 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12227 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12228 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12231 return ix86_tls_module_base_symbol;
12234 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12235 false if we expect this to be used for a memory address and true if
12236 we expect to load the address into a register. */
12239 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12241 rtx dest, base, off;
12242 rtx pic = NULL_RTX, tp = NULL_RTX;
12247 case TLS_MODEL_GLOBAL_DYNAMIC:
12248 dest = gen_reg_rtx (Pmode);
12253 pic = pic_offset_table_rtx;
12256 pic = gen_reg_rtx (Pmode);
12257 emit_insn (gen_set_got (pic));
12261 if (TARGET_GNU2_TLS)
12264 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12266 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12268 tp = get_thread_pointer (true);
12269 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12271 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12275 rtx caddr = ix86_tls_get_addr ();
12279 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12282 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12283 insns = get_insns ();
12286 RTL_CONST_CALL_P (insns) = 1;
12287 emit_libcall_block (insns, dest, rax, x);
12290 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12294 case TLS_MODEL_LOCAL_DYNAMIC:
12295 base = gen_reg_rtx (Pmode);
12300 pic = pic_offset_table_rtx;
12303 pic = gen_reg_rtx (Pmode);
12304 emit_insn (gen_set_got (pic));
12308 if (TARGET_GNU2_TLS)
12310 rtx tmp = ix86_tls_module_base ();
12313 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12315 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12317 tp = get_thread_pointer (true);
12318 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12319 gen_rtx_MINUS (Pmode, tmp, tp));
12323 rtx caddr = ix86_tls_get_addr ();
12327 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12330 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12331 insns = get_insns ();
12334 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
12335 share the LD_BASE result with other LD model accesses. */
12336 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12337 UNSPEC_TLS_LD_BASE);
12339 RTL_CONST_CALL_P (insns) = 1;
12340 emit_libcall_block (insns, base, rax, eqv);
12343 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12346 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12347 off = gen_rtx_CONST (Pmode, off);
12349 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12351 if (TARGET_GNU2_TLS)
12353 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12355 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12359 case TLS_MODEL_INITIAL_EXEC:
12362 if (TARGET_SUN_TLS)
12364 /* The Sun linker took the AMD64 TLS spec literally
12365 and can only handle %rax as destination of the
12366 initial executable code sequence. */
12368 dest = gen_reg_rtx (Pmode);
12369 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12374 type = UNSPEC_GOTNTPOFF;
12378 if (reload_in_progress)
12379 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12380 pic = pic_offset_table_rtx;
12381 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12383 else if (!TARGET_ANY_GNU_TLS)
12385 pic = gen_reg_rtx (Pmode);
12386 emit_insn (gen_set_got (pic));
12387 type = UNSPEC_GOTTPOFF;
12392 type = UNSPEC_INDNTPOFF;
12395 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12396 off = gen_rtx_CONST (Pmode, off);
12398 off = gen_rtx_PLUS (Pmode, pic, off);
12399 off = gen_const_mem (Pmode, off);
12400 set_mem_alias_set (off, ix86_GOT_alias_set ());
12402 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12404 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12405 off = force_reg (Pmode, off);
12406 return gen_rtx_PLUS (Pmode, base, off);
12410 base = get_thread_pointer (true);
12411 dest = gen_reg_rtx (Pmode);
12412 emit_insn (gen_subsi3 (dest, base, off));
12416 case TLS_MODEL_LOCAL_EXEC:
12417 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12418 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12419 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12420 off = gen_rtx_CONST (Pmode, off);
12422 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12424 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12425 return gen_rtx_PLUS (Pmode, base, off);
12429 base = get_thread_pointer (true);
12430 dest = gen_reg_rtx (Pmode);
12431 emit_insn (gen_subsi3 (dest, base, off));
12436 gcc_unreachable ();
12442 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12445 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12446 htab_t dllimport_map;
12449 get_dllimport_decl (tree decl)
12451 struct tree_map *h, in;
12454 const char *prefix;
12455 size_t namelen, prefixlen;
12460 if (!dllimport_map)
12461 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12463 in.hash = htab_hash_pointer (decl);
12464 in.base.from = decl;
12465 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12466 h = (struct tree_map *) *loc;
12470 *loc = h = ggc_alloc_tree_map ();
12472 h->base.from = decl;
12473 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12474 VAR_DECL, NULL, ptr_type_node);
12475 DECL_ARTIFICIAL (to) = 1;
12476 DECL_IGNORED_P (to) = 1;
12477 DECL_EXTERNAL (to) = 1;
12478 TREE_READONLY (to) = 1;
12480 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12481 name = targetm.strip_name_encoding (name);
12482 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12483 ? "*__imp_" : "*__imp__";
12484 namelen = strlen (name);
12485 prefixlen = strlen (prefix);
12486 imp_name = (char *) alloca (namelen + prefixlen + 1);
12487 memcpy (imp_name, prefix, prefixlen);
12488 memcpy (imp_name + prefixlen, name, namelen + 1);
12490 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12491 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12492 SET_SYMBOL_REF_DECL (rtl, to);
12493 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12495 rtl = gen_const_mem (Pmode, rtl);
12496 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12498 SET_DECL_RTL (to, rtl);
12499 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12504 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12505 true if we require the result be a register. */
12508 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12513 gcc_assert (SYMBOL_REF_DECL (symbol));
12514 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12516 x = DECL_RTL (imp_decl);
12518 x = force_reg (Pmode, x);
12522 /* Try machine-dependent ways of modifying an illegitimate address
12523 to be legitimate. If we find one, return the new, valid address.
12524 This macro is used in only one place: `memory_address' in explow.c.
12526 OLDX is the address as it was before break_out_memory_refs was called.
12527 In some cases it is useful to look at this to decide what needs to be done.
12529 It is always safe for this macro to do nothing. It exists to recognize
12530 opportunities to optimize the output.
12532 For the 80386, we handle X+REG by loading X into a register R and
12533 using R+REG. R will go in a general reg and indexing will be used.
12534 However, if REG is a broken-out memory address or multiplication,
12535 nothing needs to be done because REG can certainly go in a general reg.
12537 When -fpic is used, special handling is needed for symbolic references.
12538 See comments by legitimize_pic_address in i386.c for details. */
12541 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12542 enum machine_mode mode)
12547 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12549 return legitimize_tls_address (x, (enum tls_model) log, false);
12550 if (GET_CODE (x) == CONST
12551 && GET_CODE (XEXP (x, 0)) == PLUS
12552 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12553 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12555 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12556 (enum tls_model) log, false);
12557 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12560 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12562 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12563 return legitimize_dllimport_symbol (x, true);
12564 if (GET_CODE (x) == CONST
12565 && GET_CODE (XEXP (x, 0)) == PLUS
12566 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12567 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12569 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12570 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12574 if (flag_pic && SYMBOLIC_CONST (x))
12575 return legitimize_pic_address (x, 0);
12578 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12579 return machopic_indirect_data_reference (x, 0);
12582 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12583 if (GET_CODE (x) == ASHIFT
12584 && CONST_INT_P (XEXP (x, 1))
12585 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12588 log = INTVAL (XEXP (x, 1));
12589 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12590 GEN_INT (1 << log));
12593 if (GET_CODE (x) == PLUS)
12595 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12597 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12598 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12599 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12602 log = INTVAL (XEXP (XEXP (x, 0), 1));
12603 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12604 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12605 GEN_INT (1 << log));
12608 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12609 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12610 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12613 log = INTVAL (XEXP (XEXP (x, 1), 1));
12614 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12615 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12616 GEN_INT (1 << log));
12619 /* Put multiply first if it isn't already. */
12620 if (GET_CODE (XEXP (x, 1)) == MULT)
12622 rtx tmp = XEXP (x, 0);
12623 XEXP (x, 0) = XEXP (x, 1);
12628 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12629 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12630 created by virtual register instantiation, register elimination, and
12631 similar optimizations. */
12632 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12635 x = gen_rtx_PLUS (Pmode,
12636 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12637 XEXP (XEXP (x, 1), 0)),
12638 XEXP (XEXP (x, 1), 1));
12642 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12643 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12644 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12645 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12646 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12647 && CONSTANT_P (XEXP (x, 1)))
12650 rtx other = NULL_RTX;
12652 if (CONST_INT_P (XEXP (x, 1)))
12654 constant = XEXP (x, 1);
12655 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12657 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12659 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12660 other = XEXP (x, 1);
12668 x = gen_rtx_PLUS (Pmode,
12669 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12670 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12671 plus_constant (other, INTVAL (constant)));
12675 if (changed && ix86_legitimate_address_p (mode, x, false))
12678 if (GET_CODE (XEXP (x, 0)) == MULT)
12681 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12684 if (GET_CODE (XEXP (x, 1)) == MULT)
12687 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12691 && REG_P (XEXP (x, 1))
12692 && REG_P (XEXP (x, 0)))
12695 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12698 x = legitimize_pic_address (x, 0);
12701 if (changed && ix86_legitimate_address_p (mode, x, false))
12704 if (REG_P (XEXP (x, 0)))
12706 rtx temp = gen_reg_rtx (Pmode);
12707 rtx val = force_operand (XEXP (x, 1), temp);
12710 if (GET_MODE (val) != Pmode)
12711 val = convert_to_mode (Pmode, val, 1);
12712 emit_move_insn (temp, val);
12715 XEXP (x, 1) = temp;
12719 else if (REG_P (XEXP (x, 1)))
12721 rtx temp = gen_reg_rtx (Pmode);
12722 rtx val = force_operand (XEXP (x, 0), temp);
12725 if (GET_MODE (val) != Pmode)
12726 val = convert_to_mode (Pmode, val, 1);
12727 emit_move_insn (temp, val);
12730 XEXP (x, 0) = temp;
12738 /* Print an integer constant expression in assembler syntax. Addition
12739 and subtraction are the only arithmetic that may appear in these
12740 expressions. FILE is the stdio stream to write to, X is the rtx, and
12741 CODE is the operand print code from the output string. */
12744 output_pic_addr_const (FILE *file, rtx x, int code)
12748 switch (GET_CODE (x))
12751 gcc_assert (flag_pic);
12756 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12757 output_addr_const (file, x);
12760 const char *name = XSTR (x, 0);
12762 /* Mark the decl as referenced so that cgraph will
12763 output the function. */
12764 if (SYMBOL_REF_DECL (x))
12765 mark_decl_referenced (SYMBOL_REF_DECL (x));
12768 if (MACHOPIC_INDIRECT
12769 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12770 name = machopic_indirection_name (x, /*stub_p=*/true);
12772 assemble_name (file, name);
12774 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12775 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12776 fputs ("@PLT", file);
12783 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12784 assemble_name (asm_out_file, buf);
12788 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12792 /* This used to output parentheses around the expression,
12793 but that does not work on the 386 (either ATT or BSD assembler). */
12794 output_pic_addr_const (file, XEXP (x, 0), code);
12798 if (GET_MODE (x) == VOIDmode)
12800 /* We can use %d if the number is <32 bits and positive. */
12801 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12802 fprintf (file, "0x%lx%08lx",
12803 (unsigned long) CONST_DOUBLE_HIGH (x),
12804 (unsigned long) CONST_DOUBLE_LOW (x));
12806 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12809 /* We can't handle floating point constants;
12810 TARGET_PRINT_OPERAND must handle them. */
12811 output_operand_lossage ("floating constant misused");
12815 /* Some assemblers need integer constants to appear first. */
12816 if (CONST_INT_P (XEXP (x, 0)))
12818 output_pic_addr_const (file, XEXP (x, 0), code);
12820 output_pic_addr_const (file, XEXP (x, 1), code);
12824 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12825 output_pic_addr_const (file, XEXP (x, 1), code);
12827 output_pic_addr_const (file, XEXP (x, 0), code);
12833 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12834 output_pic_addr_const (file, XEXP (x, 0), code);
12836 output_pic_addr_const (file, XEXP (x, 1), code);
12838 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12842 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12844 bool f = i386_asm_output_addr_const_extra (file, x);
12849 gcc_assert (XVECLEN (x, 0) == 1);
12850 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12851 switch (XINT (x, 1))
12854 fputs ("@GOT", file);
12856 case UNSPEC_GOTOFF:
12857 fputs ("@GOTOFF", file);
12859 case UNSPEC_PLTOFF:
12860 fputs ("@PLTOFF", file);
12863 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12864 "(%rip)" : "[rip]", file);
12866 case UNSPEC_GOTPCREL:
12867 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12868 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12870 case UNSPEC_GOTTPOFF:
12871 /* FIXME: This might be @TPOFF in Sun ld too. */
12872 fputs ("@gottpoff", file);
12875 fputs ("@tpoff", file);
12877 case UNSPEC_NTPOFF:
12879 fputs ("@tpoff", file);
12881 fputs ("@ntpoff", file);
12883 case UNSPEC_DTPOFF:
12884 fputs ("@dtpoff", file);
12886 case UNSPEC_GOTNTPOFF:
12888 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12889 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12891 fputs ("@gotntpoff", file);
12893 case UNSPEC_INDNTPOFF:
12894 fputs ("@indntpoff", file);
12897 case UNSPEC_MACHOPIC_OFFSET:
12899 machopic_output_function_base_name (file);
12903 output_operand_lossage ("invalid UNSPEC as operand");
12909 output_operand_lossage ("invalid expression as operand");
12913 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12914 We need to emit DTP-relative relocations. */
12916 static void ATTRIBUTE_UNUSED
12917 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12919 fputs (ASM_LONG, file);
12920 output_addr_const (file, x);
12921 fputs ("@dtpoff", file);
12927 fputs (", 0", file);
12930 gcc_unreachable ();
12934 /* Return true if X is a representation of the PIC register. This copes
12935 with calls from ix86_find_base_term, where the register might have
12936 been replaced by a cselib value. */
12939 ix86_pic_register_p (rtx x)
12941 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12942 return (pic_offset_table_rtx
12943 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12945 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12948 /* Helper function for ix86_delegitimize_address.
12949 Attempt to delegitimize TLS local-exec accesses. */
12952 ix86_delegitimize_tls_address (rtx orig_x)
12954 rtx x = orig_x, unspec;
12955 struct ix86_address addr;
12957 if (!TARGET_TLS_DIRECT_SEG_REFS)
12961 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12963 if (ix86_decompose_address (x, &addr) == 0
12964 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12965 || addr.disp == NULL_RTX
12966 || GET_CODE (addr.disp) != CONST)
12968 unspec = XEXP (addr.disp, 0);
12969 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12970 unspec = XEXP (unspec, 0);
12971 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12973 x = XVECEXP (unspec, 0, 0);
12974 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12975 if (unspec != XEXP (addr.disp, 0))
12976 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12979 rtx idx = addr.index;
12980 if (addr.scale != 1)
12981 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12982 x = gen_rtx_PLUS (Pmode, idx, x);
12985 x = gen_rtx_PLUS (Pmode, addr.base, x);
12986 if (MEM_P (orig_x))
12987 x = replace_equiv_address_nv (orig_x, x);
12991 /* In the name of slightly smaller debug output, and to cater to
12992 general assembler lossage, recognize PIC+GOTOFF and turn it back
12993 into a direct symbol reference.
12995 On Darwin, this is necessary to avoid a crash, because Darwin
12996 has a different PIC label for each routine but the DWARF debugging
12997 information is not associated with any particular routine, so it's
12998 necessary to remove references to the PIC label from RTL stored by
12999 the DWARF output code. */
13002 ix86_delegitimize_address (rtx x)
13004 rtx orig_x = delegitimize_mem_from_attrs (x);
13005 /* addend is NULL or some rtx if x is something+GOTOFF where
13006 something doesn't include the PIC register. */
13007 rtx addend = NULL_RTX;
13008 /* reg_addend is NULL or a multiple of some register. */
13009 rtx reg_addend = NULL_RTX;
13010 /* const_addend is NULL or a const_int. */
13011 rtx const_addend = NULL_RTX;
13012 /* This is the result, or NULL. */
13013 rtx result = NULL_RTX;
13022 if (GET_CODE (x) != CONST
13023 || GET_CODE (XEXP (x, 0)) != UNSPEC
13024 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13025 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13026 || !MEM_P (orig_x))
13027 return ix86_delegitimize_tls_address (orig_x);
13028 x = XVECEXP (XEXP (x, 0), 0, 0);
13029 if (GET_MODE (orig_x) != GET_MODE (x))
13031 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13039 if (GET_CODE (x) != PLUS
13040 || GET_CODE (XEXP (x, 1)) != CONST)
13041 return ix86_delegitimize_tls_address (orig_x);
13043 if (ix86_pic_register_p (XEXP (x, 0)))
13044 /* %ebx + GOT/GOTOFF */
13046 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13048 /* %ebx + %reg * scale + GOT/GOTOFF */
13049 reg_addend = XEXP (x, 0);
13050 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13051 reg_addend = XEXP (reg_addend, 1);
13052 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13053 reg_addend = XEXP (reg_addend, 0);
13056 reg_addend = NULL_RTX;
13057 addend = XEXP (x, 0);
13061 addend = XEXP (x, 0);
13063 x = XEXP (XEXP (x, 1), 0);
13064 if (GET_CODE (x) == PLUS
13065 && CONST_INT_P (XEXP (x, 1)))
13067 const_addend = XEXP (x, 1);
13071 if (GET_CODE (x) == UNSPEC
13072 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13073 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13074 result = XVECEXP (x, 0, 0);
13076 if (TARGET_MACHO && darwin_local_data_pic (x)
13077 && !MEM_P (orig_x))
13078 result = XVECEXP (x, 0, 0);
13081 return ix86_delegitimize_tls_address (orig_x);
13084 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13086 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13089 /* If the rest of original X doesn't involve the PIC register, add
13090 addend and subtract pic_offset_table_rtx. This can happen e.g.
13092 leal (%ebx, %ecx, 4), %ecx
13094 movl foo@GOTOFF(%ecx), %edx
13095 in which case we return (%ecx - %ebx) + foo. */
13096 if (pic_offset_table_rtx)
13097 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13098 pic_offset_table_rtx),
13103 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13105 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13106 if (result == NULL_RTX)
13112 /* If X is a machine specific address (i.e. a symbol or label being
13113 referenced as a displacement from the GOT implemented using an
13114 UNSPEC), then return the base term. Otherwise return X. */
13117 ix86_find_base_term (rtx x)
13123 if (GET_CODE (x) != CONST)
13125 term = XEXP (x, 0);
13126 if (GET_CODE (term) == PLUS
13127 && (CONST_INT_P (XEXP (term, 1))
13128 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13129 term = XEXP (term, 0);
13130 if (GET_CODE (term) != UNSPEC
13131 || (XINT (term, 1) != UNSPEC_GOTPCREL
13132 && XINT (term, 1) != UNSPEC_PCREL))
13135 return XVECEXP (term, 0, 0);
13138 return ix86_delegitimize_address (x);
13142 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13143 int fp, FILE *file)
13145 const char *suffix;
13147 if (mode == CCFPmode || mode == CCFPUmode)
13149 code = ix86_fp_compare_code_to_integer (code);
13153 code = reverse_condition (code);
13204 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13208 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13209 Those same assemblers have the same but opposite lossage on cmov. */
13210 if (mode == CCmode)
13211 suffix = fp ? "nbe" : "a";
13212 else if (mode == CCCmode)
13215 gcc_unreachable ();
13231 gcc_unreachable ();
13235 gcc_assert (mode == CCmode || mode == CCCmode);
13252 gcc_unreachable ();
13256 /* ??? As above. */
13257 gcc_assert (mode == CCmode || mode == CCCmode);
13258 suffix = fp ? "nb" : "ae";
13261 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13265 /* ??? As above. */
13266 if (mode == CCmode)
13268 else if (mode == CCCmode)
13269 suffix = fp ? "nb" : "ae";
13271 gcc_unreachable ();
13274 suffix = fp ? "u" : "p";
13277 suffix = fp ? "nu" : "np";
13280 gcc_unreachable ();
13282 fputs (suffix, file);
13285 /* Print the name of register X to FILE based on its machine mode and number.
13286 If CODE is 'w', pretend the mode is HImode.
13287 If CODE is 'b', pretend the mode is QImode.
13288 If CODE is 'k', pretend the mode is SImode.
13289 If CODE is 'q', pretend the mode is DImode.
13290 If CODE is 'x', pretend the mode is V4SFmode.
13291 If CODE is 't', pretend the mode is V8SFmode.
13292 If CODE is 'h', pretend the reg is the 'high' byte register.
13293 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13294 If CODE is 'd', duplicate the operand for AVX instruction.
13298 print_reg (rtx x, int code, FILE *file)
13301 bool duplicated = code == 'd' && TARGET_AVX;
13303 gcc_assert (x == pc_rtx
13304 || (REGNO (x) != ARG_POINTER_REGNUM
13305 && REGNO (x) != FRAME_POINTER_REGNUM
13306 && REGNO (x) != FLAGS_REG
13307 && REGNO (x) != FPSR_REG
13308 && REGNO (x) != FPCR_REG));
13310 if (ASSEMBLER_DIALECT == ASM_ATT)
13315 gcc_assert (TARGET_64BIT);
13316 fputs ("rip", file);
13320 if (code == 'w' || MMX_REG_P (x))
13322 else if (code == 'b')
13324 else if (code == 'k')
13326 else if (code == 'q')
13328 else if (code == 'y')
13330 else if (code == 'h')
13332 else if (code == 'x')
13334 else if (code == 't')
13337 code = GET_MODE_SIZE (GET_MODE (x));
13339 /* Irritatingly, AMD extended registers use different naming convention
13340 from the normal registers. */
13341 if (REX_INT_REG_P (x))
13343 gcc_assert (TARGET_64BIT);
13347 error ("extended registers have no high halves");
13350 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13353 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13356 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13359 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13362 error ("unsupported operand size for extended register");
13372 if (STACK_TOP_P (x))
13381 if (! ANY_FP_REG_P (x))
13382 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13387 reg = hi_reg_name[REGNO (x)];
13390 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13392 reg = qi_reg_name[REGNO (x)];
13395 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13397 reg = qi_high_reg_name[REGNO (x)];
13402 gcc_assert (!duplicated);
13404 fputs (hi_reg_name[REGNO (x)] + 1, file);
13409 gcc_unreachable ();
13415 if (ASSEMBLER_DIALECT == ASM_ATT)
13416 fprintf (file, ", %%%s", reg);
13418 fprintf (file, ", %s", reg);
13422 /* Locate some local-dynamic symbol still in use by this function
13423 so that we can print its name in some tls_local_dynamic_base
13427 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13431 if (GET_CODE (x) == SYMBOL_REF
13432 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13434 cfun->machine->some_ld_name = XSTR (x, 0);
13441 static const char *
13442 get_some_local_dynamic_name (void)
13446 if (cfun->machine->some_ld_name)
13447 return cfun->machine->some_ld_name;
13449 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13450 if (NONDEBUG_INSN_P (insn)
13451 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13452 return cfun->machine->some_ld_name;
13457 /* Meaning of CODE:
13458 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13459 C -- print opcode suffix for set/cmov insn.
13460 c -- like C, but print reversed condition
13461 F,f -- likewise, but for floating-point.
13462 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13464 R -- print the prefix for register names.
13465 z -- print the opcode suffix for the size of the current operand.
13466 Z -- likewise, with special suffixes for x87 instructions.
13467 * -- print a star (in certain assembler syntax)
13468 A -- print an absolute memory reference.
13469 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13470 s -- print a shift double count, followed by the assemblers argument
13472 b -- print the QImode name of the register for the indicated operand.
13473 %b0 would print %al if operands[0] is reg 0.
13474 w -- likewise, print the HImode name of the register.
13475 k -- likewise, print the SImode name of the register.
13476 q -- likewise, print the DImode name of the register.
13477 x -- likewise, print the V4SFmode name of the register.
13478 t -- likewise, print the V8SFmode name of the register.
13479 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13480 y -- print "st(0)" instead of "st" as a register.
13481 d -- print duplicated register operand for AVX instruction.
13482 D -- print condition for SSE cmp instruction.
13483 P -- if PIC, print an @PLT suffix.
13484 p -- print raw symbol name.
13485 X -- don't print any sort of PIC '@' suffix for a symbol.
13486 & -- print some in-use local-dynamic symbol name.
13487 H -- print a memory address offset by 8; used for sse high-parts
13488 Y -- print condition for XOP pcom* instruction.
13489 + -- print a branch hint as 'cs' or 'ds' prefix
13490 ; -- print a semicolon (after prefixes due to bug in older gas).
13491 @ -- print a segment register of thread base pointer load
13495 ix86_print_operand (FILE *file, rtx x, int code)
13502 if (ASSEMBLER_DIALECT == ASM_ATT)
13508 const char *name = get_some_local_dynamic_name ();
13510 output_operand_lossage ("'%%&' used without any "
13511 "local dynamic TLS references");
13513 assemble_name (file, name);
13518 switch (ASSEMBLER_DIALECT)
13525 /* Intel syntax. For absolute addresses, registers should not
13526 be surrounded by braces. */
13530 ix86_print_operand (file, x, 0);
13537 gcc_unreachable ();
13540 ix86_print_operand (file, x, 0);
13545 if (ASSEMBLER_DIALECT == ASM_ATT)
13550 if (ASSEMBLER_DIALECT == ASM_ATT)
13555 if (ASSEMBLER_DIALECT == ASM_ATT)
13560 if (ASSEMBLER_DIALECT == ASM_ATT)
13565 if (ASSEMBLER_DIALECT == ASM_ATT)
13570 if (ASSEMBLER_DIALECT == ASM_ATT)
13575 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13577 /* Opcodes don't get size suffixes if using Intel opcodes. */
13578 if (ASSEMBLER_DIALECT == ASM_INTEL)
13581 switch (GET_MODE_SIZE (GET_MODE (x)))
13600 output_operand_lossage
13601 ("invalid operand size for operand code '%c'", code);
13606 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13608 (0, "non-integer operand used with operand code '%c'", code);
13612 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13613 if (ASSEMBLER_DIALECT == ASM_INTEL)
13616 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13618 switch (GET_MODE_SIZE (GET_MODE (x)))
13621 #ifdef HAVE_AS_IX86_FILDS
13631 #ifdef HAVE_AS_IX86_FILDQ
13634 fputs ("ll", file);
13642 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13644 /* 387 opcodes don't get size suffixes
13645 if the operands are registers. */
13646 if (STACK_REG_P (x))
13649 switch (GET_MODE_SIZE (GET_MODE (x)))
13670 output_operand_lossage
13671 ("invalid operand type used with operand code '%c'", code);
13675 output_operand_lossage
13676 ("invalid operand size for operand code '%c'", code);
13694 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13696 ix86_print_operand (file, x, 0);
13697 fputs (", ", file);
13702 /* Little bit of braindamage here. The SSE compare instructions
13703 does use completely different names for the comparisons that the
13704 fp conditional moves. */
13707 switch (GET_CODE (x))
13710 fputs ("eq", file);
13713 fputs ("eq_us", file);
13716 fputs ("lt", file);
13719 fputs ("nge", file);
13722 fputs ("le", file);
13725 fputs ("ngt", file);
13728 fputs ("unord", file);
13731 fputs ("neq", file);
13734 fputs ("neq_oq", file);
13737 fputs ("ge", file);
13740 fputs ("nlt", file);
13743 fputs ("gt", file);
13746 fputs ("nle", file);
13749 fputs ("ord", file);
13752 output_operand_lossage ("operand is not a condition code, "
13753 "invalid operand code 'D'");
13759 switch (GET_CODE (x))
13763 fputs ("eq", file);
13767 fputs ("lt", file);
13771 fputs ("le", file);
13774 fputs ("unord", file);
13778 fputs ("neq", file);
13782 fputs ("nlt", file);
13786 fputs ("nle", file);
13789 fputs ("ord", file);
13792 output_operand_lossage ("operand is not a condition code, "
13793 "invalid operand code 'D'");
13799 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13800 if (ASSEMBLER_DIALECT == ASM_ATT)
13802 switch (GET_MODE (x))
13804 case HImode: putc ('w', file); break;
13806 case SFmode: putc ('l', file); break;
13808 case DFmode: putc ('q', file); break;
13809 default: gcc_unreachable ();
13816 if (!COMPARISON_P (x))
13818 output_operand_lossage ("operand is neither a constant nor a "
13819 "condition code, invalid operand code "
13823 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13826 if (!COMPARISON_P (x))
13828 output_operand_lossage ("operand is neither a constant nor a "
13829 "condition code, invalid operand code "
13833 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13834 if (ASSEMBLER_DIALECT == ASM_ATT)
13837 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13840 /* Like above, but reverse condition */
13842 /* Check to see if argument to %c is really a constant
13843 and not a condition code which needs to be reversed. */
13844 if (!COMPARISON_P (x))
13846 output_operand_lossage ("operand is neither a constant nor a "
13847 "condition code, invalid operand "
13851 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13854 if (!COMPARISON_P (x))
13856 output_operand_lossage ("operand is neither a constant nor a "
13857 "condition code, invalid operand "
13861 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13862 if (ASSEMBLER_DIALECT == ASM_ATT)
13865 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13869 /* It doesn't actually matter what mode we use here, as we're
13870 only going to use this for printing. */
13871 x = adjust_address_nv (x, DImode, 8);
13879 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13882 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13885 int pred_val = INTVAL (XEXP (x, 0));
13887 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13888 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13890 int taken = pred_val > REG_BR_PROB_BASE / 2;
13891 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13893 /* Emit hints only in the case default branch prediction
13894 heuristics would fail. */
13895 if (taken != cputaken)
13897 /* We use 3e (DS) prefix for taken branches and
13898 2e (CS) prefix for not taken branches. */
13900 fputs ("ds ; ", file);
13902 fputs ("cs ; ", file);
13910 switch (GET_CODE (x))
13913 fputs ("neq", file);
13916 fputs ("eq", file);
13920 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13924 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13928 fputs ("le", file);
13932 fputs ("lt", file);
13935 fputs ("unord", file);
13938 fputs ("ord", file);
13941 fputs ("ueq", file);
13944 fputs ("nlt", file);
13947 fputs ("nle", file);
13950 fputs ("ule", file);
13953 fputs ("ult", file);
13956 fputs ("une", file);
13959 output_operand_lossage ("operand is not a condition code, "
13960 "invalid operand code 'Y'");
13966 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13972 if (ASSEMBLER_DIALECT == ASM_ATT)
13975 /* The kernel uses a different segment register for performance
13976 reasons; a system call would not have to trash the userspace
13977 segment register, which would be expensive. */
13978 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13979 fputs ("fs", file);
13981 fputs ("gs", file);
13985 output_operand_lossage ("invalid operand code '%c'", code);
13990 print_reg (x, code, file);
13992 else if (MEM_P (x))
13994 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13995 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13996 && GET_MODE (x) != BLKmode)
13999 switch (GET_MODE_SIZE (GET_MODE (x)))
14001 case 1: size = "BYTE"; break;
14002 case 2: size = "WORD"; break;
14003 case 4: size = "DWORD"; break;
14004 case 8: size = "QWORD"; break;
14005 case 12: size = "TBYTE"; break;
14007 if (GET_MODE (x) == XFmode)
14012 case 32: size = "YMMWORD"; break;
14014 gcc_unreachable ();
14017 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14020 else if (code == 'w')
14022 else if (code == 'k')
14025 fputs (size, file);
14026 fputs (" PTR ", file);
14030 /* Avoid (%rip) for call operands. */
14031 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14032 && !CONST_INT_P (x))
14033 output_addr_const (file, x);
14034 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14035 output_operand_lossage ("invalid constraints for operand");
14037 output_address (x);
14040 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14045 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14046 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14048 if (ASSEMBLER_DIALECT == ASM_ATT)
14050 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14052 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14054 fprintf (file, "0x%08x", (unsigned int) l);
14057 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14062 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14063 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14065 if (ASSEMBLER_DIALECT == ASM_ATT)
14067 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14070 /* These float cases don't actually occur as immediate operands. */
14071 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14075 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14076 fputs (dstr, file);
14081 /* We have patterns that allow zero sets of memory, for instance.
14082 In 64-bit mode, we should probably support all 8-byte vectors,
14083 since we can in fact encode that into an immediate. */
14084 if (GET_CODE (x) == CONST_VECTOR)
14086 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14090 if (code != 'P' && code != 'p')
14092 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14094 if (ASSEMBLER_DIALECT == ASM_ATT)
14097 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14098 || GET_CODE (x) == LABEL_REF)
14100 if (ASSEMBLER_DIALECT == ASM_ATT)
14103 fputs ("OFFSET FLAT:", file);
14106 if (CONST_INT_P (x))
14107 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14108 else if (flag_pic || MACHOPIC_INDIRECT)
14109 output_pic_addr_const (file, x, code);
14111 output_addr_const (file, x);
14116 ix86_print_operand_punct_valid_p (unsigned char code)
14118 return (code == '@' || code == '*' || code == '+'
14119 || code == '&' || code == ';');
14122 /* Print a memory operand whose address is ADDR. */
14125 ix86_print_operand_address (FILE *file, rtx addr)
14127 struct ix86_address parts;
14128 rtx base, index, disp;
14130 int ok = ix86_decompose_address (addr, &parts);
14134 if (parts.base && GET_CODE (parts.base) == SUBREG)
14136 rtx tmp = SUBREG_REG (parts.base);
14137 parts.base = simplify_subreg (GET_MODE (parts.base),
14138 tmp, GET_MODE (tmp), 0);
14141 if (parts.index && GET_CODE (parts.index) == SUBREG)
14143 rtx tmp = SUBREG_REG (parts.index);
14144 parts.index = simplify_subreg (GET_MODE (parts.index),
14145 tmp, GET_MODE (tmp), 0);
14149 index = parts.index;
14151 scale = parts.scale;
14159 if (ASSEMBLER_DIALECT == ASM_ATT)
14161 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14164 gcc_unreachable ();
14167 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14168 if (TARGET_64BIT && !base && !index)
14172 if (GET_CODE (disp) == CONST
14173 && GET_CODE (XEXP (disp, 0)) == PLUS
14174 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14175 symbol = XEXP (XEXP (disp, 0), 0);
14177 if (GET_CODE (symbol) == LABEL_REF
14178 || (GET_CODE (symbol) == SYMBOL_REF
14179 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14182 if (!base && !index)
14184 /* Displacement only requires special attention. */
14186 if (CONST_INT_P (disp))
14188 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14189 fputs ("ds:", file);
14190 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14193 output_pic_addr_const (file, disp, 0);
14195 output_addr_const (file, disp);
14201 /* Print SImode registers for zero-extended addresses to force
14202 addr32 prefix. Otherwise print DImode registers to avoid it. */
14204 code = ((GET_CODE (addr) == ZERO_EXTEND
14205 || GET_CODE (addr) == AND)
14209 if (ASSEMBLER_DIALECT == ASM_ATT)
14214 output_pic_addr_const (file, disp, 0);
14215 else if (GET_CODE (disp) == LABEL_REF)
14216 output_asm_label (disp);
14218 output_addr_const (file, disp);
14223 print_reg (base, code, file);
14227 print_reg (index, code, file);
14229 fprintf (file, ",%d", scale);
14235 rtx offset = NULL_RTX;
14239 /* Pull out the offset of a symbol; print any symbol itself. */
14240 if (GET_CODE (disp) == CONST
14241 && GET_CODE (XEXP (disp, 0)) == PLUS
14242 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14244 offset = XEXP (XEXP (disp, 0), 1);
14245 disp = gen_rtx_CONST (VOIDmode,
14246 XEXP (XEXP (disp, 0), 0));
14250 output_pic_addr_const (file, disp, 0);
14251 else if (GET_CODE (disp) == LABEL_REF)
14252 output_asm_label (disp);
14253 else if (CONST_INT_P (disp))
14256 output_addr_const (file, disp);
14262 print_reg (base, code, file);
14265 if (INTVAL (offset) >= 0)
14267 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14271 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14278 print_reg (index, code, file);
14280 fprintf (file, "*%d", scale);
14287 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14290 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14294 if (GET_CODE (x) != UNSPEC)
14297 op = XVECEXP (x, 0, 0);
14298 switch (XINT (x, 1))
14300 case UNSPEC_GOTTPOFF:
14301 output_addr_const (file, op);
14302 /* FIXME: This might be @TPOFF in Sun ld. */
14303 fputs ("@gottpoff", file);
14306 output_addr_const (file, op);
14307 fputs ("@tpoff", file);
14309 case UNSPEC_NTPOFF:
14310 output_addr_const (file, op);
14312 fputs ("@tpoff", file);
14314 fputs ("@ntpoff", file);
14316 case UNSPEC_DTPOFF:
14317 output_addr_const (file, op);
14318 fputs ("@dtpoff", file);
14320 case UNSPEC_GOTNTPOFF:
14321 output_addr_const (file, op);
14323 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14324 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14326 fputs ("@gotntpoff", file);
14328 case UNSPEC_INDNTPOFF:
14329 output_addr_const (file, op);
14330 fputs ("@indntpoff", file);
14333 case UNSPEC_MACHOPIC_OFFSET:
14334 output_addr_const (file, op);
14336 machopic_output_function_base_name (file);
14340 case UNSPEC_STACK_CHECK:
14344 gcc_assert (flag_split_stack);
14346 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14347 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14349 gcc_unreachable ();
14352 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14363 /* Split one or more double-mode RTL references into pairs of half-mode
14364 references. The RTL can be REG, offsettable MEM, integer constant, or
14365 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14366 split and "num" is its length. lo_half and hi_half are output arrays
14367 that parallel "operands". */
14370 split_double_mode (enum machine_mode mode, rtx operands[],
14371 int num, rtx lo_half[], rtx hi_half[])
14373 enum machine_mode half_mode;
14379 half_mode = DImode;
14382 half_mode = SImode;
14385 gcc_unreachable ();
14388 byte = GET_MODE_SIZE (half_mode);
14392 rtx op = operands[num];
14394 /* simplify_subreg refuse to split volatile memory addresses,
14395 but we still have to handle it. */
14398 lo_half[num] = adjust_address (op, half_mode, 0);
14399 hi_half[num] = adjust_address (op, half_mode, byte);
14403 lo_half[num] = simplify_gen_subreg (half_mode, op,
14404 GET_MODE (op) == VOIDmode
14405 ? mode : GET_MODE (op), 0);
14406 hi_half[num] = simplify_gen_subreg (half_mode, op,
14407 GET_MODE (op) == VOIDmode
14408 ? mode : GET_MODE (op), byte);
14413 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14414 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14415 is the expression of the binary operation. The output may either be
14416 emitted here, or returned to the caller, like all output_* functions.
14418 There is no guarantee that the operands are the same mode, as they
14419 might be within FLOAT or FLOAT_EXTEND expressions. */
14421 #ifndef SYSV386_COMPAT
14422 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14423 wants to fix the assemblers because that causes incompatibility
14424 with gcc. No-one wants to fix gcc because that causes
14425 incompatibility with assemblers... You can use the option of
14426 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14427 #define SYSV386_COMPAT 1
14431 output_387_binary_op (rtx insn, rtx *operands)
14433 static char buf[40];
14436 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14438 #ifdef ENABLE_CHECKING
14439 /* Even if we do not want to check the inputs, this documents input
14440 constraints. Which helps in understanding the following code. */
14441 if (STACK_REG_P (operands[0])
14442 && ((REG_P (operands[1])
14443 && REGNO (operands[0]) == REGNO (operands[1])
14444 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14445 || (REG_P (operands[2])
14446 && REGNO (operands[0]) == REGNO (operands[2])
14447 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14448 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14451 gcc_assert (is_sse);
14454 switch (GET_CODE (operands[3]))
14457 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14458 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14466 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14467 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14475 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14476 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14484 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14485 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14493 gcc_unreachable ();
14500 strcpy (buf, ssep);
14501 if (GET_MODE (operands[0]) == SFmode)
14502 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14504 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14508 strcpy (buf, ssep + 1);
14509 if (GET_MODE (operands[0]) == SFmode)
14510 strcat (buf, "ss\t{%2, %0|%0, %2}");
14512 strcat (buf, "sd\t{%2, %0|%0, %2}");
14518 switch (GET_CODE (operands[3]))
14522 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14524 rtx temp = operands[2];
14525 operands[2] = operands[1];
14526 operands[1] = temp;
14529 /* know operands[0] == operands[1]. */
14531 if (MEM_P (operands[2]))
14537 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14539 if (STACK_TOP_P (operands[0]))
14540 /* How is it that we are storing to a dead operand[2]?
14541 Well, presumably operands[1] is dead too. We can't
14542 store the result to st(0) as st(0) gets popped on this
14543 instruction. Instead store to operands[2] (which I
14544 think has to be st(1)). st(1) will be popped later.
14545 gcc <= 2.8.1 didn't have this check and generated
14546 assembly code that the Unixware assembler rejected. */
14547 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14549 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14553 if (STACK_TOP_P (operands[0]))
14554 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14556 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14561 if (MEM_P (operands[1]))
14567 if (MEM_P (operands[2]))
14573 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14576 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14577 derived assemblers, confusingly reverse the direction of
14578 the operation for fsub{r} and fdiv{r} when the
14579 destination register is not st(0). The Intel assembler
14580 doesn't have this brain damage. Read !SYSV386_COMPAT to
14581 figure out what the hardware really does. */
14582 if (STACK_TOP_P (operands[0]))
14583 p = "{p\t%0, %2|rp\t%2, %0}";
14585 p = "{rp\t%2, %0|p\t%0, %2}";
14587 if (STACK_TOP_P (operands[0]))
14588 /* As above for fmul/fadd, we can't store to st(0). */
14589 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14591 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14596 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14599 if (STACK_TOP_P (operands[0]))
14600 p = "{rp\t%0, %1|p\t%1, %0}";
14602 p = "{p\t%1, %0|rp\t%0, %1}";
14604 if (STACK_TOP_P (operands[0]))
14605 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14607 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14612 if (STACK_TOP_P (operands[0]))
14614 if (STACK_TOP_P (operands[1]))
14615 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14617 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14620 else if (STACK_TOP_P (operands[1]))
14623 p = "{\t%1, %0|r\t%0, %1}";
14625 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14631 p = "{r\t%2, %0|\t%0, %2}";
14633 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14639 gcc_unreachable ();
14646 /* Return needed mode for entity in optimize_mode_switching pass. */
14649 ix86_mode_needed (int entity, rtx insn)
14651 enum attr_i387_cw mode;
14653 /* The mode UNINITIALIZED is used to store control word after a
14654 function call or ASM pattern. The mode ANY specify that function
14655 has no requirements on the control word and make no changes in the
14656 bits we are interested in. */
14659 || (NONJUMP_INSN_P (insn)
14660 && (asm_noperands (PATTERN (insn)) >= 0
14661 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14662 return I387_CW_UNINITIALIZED;
14664 if (recog_memoized (insn) < 0)
14665 return I387_CW_ANY;
14667 mode = get_attr_i387_cw (insn);
14672 if (mode == I387_CW_TRUNC)
14677 if (mode == I387_CW_FLOOR)
14682 if (mode == I387_CW_CEIL)
14687 if (mode == I387_CW_MASK_PM)
14692 gcc_unreachable ();
14695 return I387_CW_ANY;
14698 /* Output code to initialize control word copies used by trunc?f?i and
14699 rounding patterns. CURRENT_MODE is set to current control word,
14700 while NEW_MODE is set to new control word. */
14703 emit_i387_cw_initialization (int mode)
14705 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14708 enum ix86_stack_slot slot;
14710 rtx reg = gen_reg_rtx (HImode);
14712 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14713 emit_move_insn (reg, copy_rtx (stored_mode));
14715 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14716 || optimize_function_for_size_p (cfun))
14720 case I387_CW_TRUNC:
14721 /* round toward zero (truncate) */
14722 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14723 slot = SLOT_CW_TRUNC;
14726 case I387_CW_FLOOR:
14727 /* round down toward -oo */
14728 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14729 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14730 slot = SLOT_CW_FLOOR;
14734 /* round up toward +oo */
14735 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14736 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14737 slot = SLOT_CW_CEIL;
14740 case I387_CW_MASK_PM:
14741 /* mask precision exception for nearbyint() */
14742 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14743 slot = SLOT_CW_MASK_PM;
14747 gcc_unreachable ();
14754 case I387_CW_TRUNC:
14755 /* round toward zero (truncate) */
14756 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14757 slot = SLOT_CW_TRUNC;
14760 case I387_CW_FLOOR:
14761 /* round down toward -oo */
14762 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14763 slot = SLOT_CW_FLOOR;
14767 /* round up toward +oo */
14768 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14769 slot = SLOT_CW_CEIL;
14772 case I387_CW_MASK_PM:
14773 /* mask precision exception for nearbyint() */
14774 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14775 slot = SLOT_CW_MASK_PM;
14779 gcc_unreachable ();
14783 gcc_assert (slot < MAX_386_STACK_LOCALS);
14785 new_mode = assign_386_stack_local (HImode, slot);
14786 emit_move_insn (new_mode, reg);
14789 /* Output code for INSN to convert a float to a signed int. OPERANDS
14790 are the insn operands. The output may be [HSD]Imode and the input
14791 operand may be [SDX]Fmode. */
14794 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
14796 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14797 int dimode_p = GET_MODE (operands[0]) == DImode;
14798 int round_mode = get_attr_i387_cw (insn);
14800 /* Jump through a hoop or two for DImode, since the hardware has no
14801 non-popping instruction. We used to do this a different way, but
14802 that was somewhat fragile and broke with post-reload splitters. */
14803 if ((dimode_p || fisttp) && !stack_top_dies)
14804 output_asm_insn ("fld\t%y1", operands);
14806 gcc_assert (STACK_TOP_P (operands[1]));
14807 gcc_assert (MEM_P (operands[0]));
14808 gcc_assert (GET_MODE (operands[1]) != TFmode);
14811 output_asm_insn ("fisttp%Z0\t%0", operands);
14814 if (round_mode != I387_CW_ANY)
14815 output_asm_insn ("fldcw\t%3", operands);
14816 if (stack_top_dies || dimode_p)
14817 output_asm_insn ("fistp%Z0\t%0", operands);
14819 output_asm_insn ("fist%Z0\t%0", operands);
14820 if (round_mode != I387_CW_ANY)
14821 output_asm_insn ("fldcw\t%2", operands);
14827 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14828 have the values zero or one, indicates the ffreep insn's operand
14829 from the OPERANDS array. */
14831 static const char *
14832 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14834 if (TARGET_USE_FFREEP)
14835 #ifdef HAVE_AS_IX86_FFREEP
14836 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14839 static char retval[32];
14840 int regno = REGNO (operands[opno]);
14842 gcc_assert (FP_REGNO_P (regno));
14844 regno -= FIRST_STACK_REG;
14846 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14851 return opno ? "fstp\t%y1" : "fstp\t%y0";
14855 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14856 should be used. UNORDERED_P is true when fucom should be used. */
14859 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
14861 int stack_top_dies;
14862 rtx cmp_op0, cmp_op1;
14863 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14867 cmp_op0 = operands[0];
14868 cmp_op1 = operands[1];
14872 cmp_op0 = operands[1];
14873 cmp_op1 = operands[2];
14878 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14879 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14880 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14881 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14883 if (GET_MODE (operands[0]) == SFmode)
14885 return &ucomiss[TARGET_AVX ? 0 : 1];
14887 return &comiss[TARGET_AVX ? 0 : 1];
14890 return &ucomisd[TARGET_AVX ? 0 : 1];
14892 return &comisd[TARGET_AVX ? 0 : 1];
14895 gcc_assert (STACK_TOP_P (cmp_op0));
14897 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14899 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14901 if (stack_top_dies)
14903 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14904 return output_387_ffreep (operands, 1);
14907 return "ftst\n\tfnstsw\t%0";
14910 if (STACK_REG_P (cmp_op1)
14912 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14913 && REGNO (cmp_op1) != FIRST_STACK_REG)
14915 /* If both the top of the 387 stack dies, and the other operand
14916 is also a stack register that dies, then this must be a
14917 `fcompp' float compare */
14921 /* There is no double popping fcomi variant. Fortunately,
14922 eflags is immune from the fstp's cc clobbering. */
14924 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14926 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14927 return output_387_ffreep (operands, 0);
14932 return "fucompp\n\tfnstsw\t%0";
14934 return "fcompp\n\tfnstsw\t%0";
14939 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14941 static const char * const alt[16] =
14943 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14944 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14945 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14946 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14948 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14949 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14953 "fcomi\t{%y1, %0|%0, %y1}",
14954 "fcomip\t{%y1, %0|%0, %y1}",
14955 "fucomi\t{%y1, %0|%0, %y1}",
14956 "fucomip\t{%y1, %0|%0, %y1}",
14967 mask = eflags_p << 3;
14968 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14969 mask |= unordered_p << 1;
14970 mask |= stack_top_dies;
14972 gcc_assert (mask < 16);
14981 ix86_output_addr_vec_elt (FILE *file, int value)
14983 const char *directive = ASM_LONG;
14987 directive = ASM_QUAD;
14989 gcc_assert (!TARGET_64BIT);
14992 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14996 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14998 const char *directive = ASM_LONG;
15001 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15002 directive = ASM_QUAD;
15004 gcc_assert (!TARGET_64BIT);
15006 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15007 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15008 fprintf (file, "%s%s%d-%s%d\n",
15009 directive, LPREFIX, value, LPREFIX, rel);
15010 else if (HAVE_AS_GOTOFF_IN_DATA)
15011 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15013 else if (TARGET_MACHO)
15015 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15016 machopic_output_function_base_name (file);
15021 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15022 GOT_SYMBOL_NAME, LPREFIX, value);
15025 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15029 ix86_expand_clear (rtx dest)
15033 /* We play register width games, which are only valid after reload. */
15034 gcc_assert (reload_completed);
15036 /* Avoid HImode and its attendant prefix byte. */
15037 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15038 dest = gen_rtx_REG (SImode, REGNO (dest));
15039 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15041 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15042 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15044 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15045 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15051 /* X is an unchanging MEM. If it is a constant pool reference, return
15052 the constant pool rtx, else NULL. */
15055 maybe_get_pool_constant (rtx x)
15057 x = ix86_delegitimize_address (XEXP (x, 0));
15059 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15060 return get_pool_constant (x);
15066 ix86_expand_move (enum machine_mode mode, rtx operands[])
15069 enum tls_model model;
15074 if (GET_CODE (op1) == SYMBOL_REF)
15076 model = SYMBOL_REF_TLS_MODEL (op1);
15079 op1 = legitimize_tls_address (op1, model, true);
15080 op1 = force_operand (op1, op0);
15083 if (GET_MODE (op1) != mode)
15084 op1 = convert_to_mode (mode, op1, 1);
15086 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15087 && SYMBOL_REF_DLLIMPORT_P (op1))
15088 op1 = legitimize_dllimport_symbol (op1, false);
15090 else if (GET_CODE (op1) == CONST
15091 && GET_CODE (XEXP (op1, 0)) == PLUS
15092 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15094 rtx addend = XEXP (XEXP (op1, 0), 1);
15095 rtx symbol = XEXP (XEXP (op1, 0), 0);
15098 model = SYMBOL_REF_TLS_MODEL (symbol);
15100 tmp = legitimize_tls_address (symbol, model, true);
15101 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15102 && SYMBOL_REF_DLLIMPORT_P (symbol))
15103 tmp = legitimize_dllimport_symbol (symbol, true);
15107 tmp = force_operand (tmp, NULL);
15108 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15109 op0, 1, OPTAB_DIRECT);
15112 if (GET_MODE (tmp) != mode)
15113 op1 = convert_to_mode (mode, tmp, 1);
15117 if ((flag_pic || MACHOPIC_INDIRECT)
15118 && symbolic_operand (op1, mode))
15120 if (TARGET_MACHO && !TARGET_64BIT)
15123 /* dynamic-no-pic */
15124 if (MACHOPIC_INDIRECT)
15126 rtx temp = ((reload_in_progress
15127 || ((op0 && REG_P (op0))
15129 ? op0 : gen_reg_rtx (Pmode));
15130 op1 = machopic_indirect_data_reference (op1, temp);
15132 op1 = machopic_legitimize_pic_address (op1, mode,
15133 temp == op1 ? 0 : temp);
15135 if (op0 != op1 && GET_CODE (op0) != MEM)
15137 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15141 if (GET_CODE (op0) == MEM)
15142 op1 = force_reg (Pmode, op1);
15146 if (GET_CODE (temp) != REG)
15147 temp = gen_reg_rtx (Pmode);
15148 temp = legitimize_pic_address (op1, temp);
15153 /* dynamic-no-pic */
15159 op1 = force_reg (mode, op1);
15160 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15162 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15163 op1 = legitimize_pic_address (op1, reg);
15166 if (GET_MODE (op1) != mode)
15167 op1 = convert_to_mode (mode, op1, 1);
15174 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15175 || !push_operand (op0, mode))
15177 op1 = force_reg (mode, op1);
15179 if (push_operand (op0, mode)
15180 && ! general_no_elim_operand (op1, mode))
15181 op1 = copy_to_mode_reg (mode, op1);
15183 /* Force large constants in 64bit compilation into register
15184 to get them CSEed. */
15185 if (can_create_pseudo_p ()
15186 && (mode == DImode) && TARGET_64BIT
15187 && immediate_operand (op1, mode)
15188 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15189 && !register_operand (op0, mode)
15191 op1 = copy_to_mode_reg (mode, op1);
15193 if (can_create_pseudo_p ()
15194 && FLOAT_MODE_P (mode)
15195 && GET_CODE (op1) == CONST_DOUBLE)
15197 /* If we are loading a floating point constant to a register,
15198 force the value to memory now, since we'll get better code
15199 out the back end. */
15201 op1 = validize_mem (force_const_mem (mode, op1));
15202 if (!register_operand (op0, mode))
15204 rtx temp = gen_reg_rtx (mode);
15205 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15206 emit_move_insn (op0, temp);
15212 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15216 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15218 rtx op0 = operands[0], op1 = operands[1];
15219 unsigned int align = GET_MODE_ALIGNMENT (mode);
15221 /* Force constants other than zero into memory. We do not know how
15222 the instructions used to build constants modify the upper 64 bits
15223 of the register, once we have that information we may be able
15224 to handle some of them more efficiently. */
15225 if (can_create_pseudo_p ()
15226 && register_operand (op0, mode)
15227 && (CONSTANT_P (op1)
15228 || (GET_CODE (op1) == SUBREG
15229 && CONSTANT_P (SUBREG_REG (op1))))
15230 && !standard_sse_constant_p (op1))
15231 op1 = validize_mem (force_const_mem (mode, op1));
15233 /* We need to check memory alignment for SSE mode since attribute
15234 can make operands unaligned. */
15235 if (can_create_pseudo_p ()
15236 && SSE_REG_MODE_P (mode)
15237 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15238 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15242 /* ix86_expand_vector_move_misalign() does not like constants ... */
15243 if (CONSTANT_P (op1)
15244 || (GET_CODE (op1) == SUBREG
15245 && CONSTANT_P (SUBREG_REG (op1))))
15246 op1 = validize_mem (force_const_mem (mode, op1));
15248 /* ... nor both arguments in memory. */
15249 if (!register_operand (op0, mode)
15250 && !register_operand (op1, mode))
15251 op1 = force_reg (mode, op1);
15253 tmp[0] = op0; tmp[1] = op1;
15254 ix86_expand_vector_move_misalign (mode, tmp);
15258 /* Make operand1 a register if it isn't already. */
15259 if (can_create_pseudo_p ()
15260 && !register_operand (op0, mode)
15261 && !register_operand (op1, mode))
15263 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15267 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15270 /* Split 32-byte AVX unaligned load and store if needed. */
15273 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15276 rtx (*extract) (rtx, rtx, rtx);
15277 rtx (*move_unaligned) (rtx, rtx);
15278 enum machine_mode mode;
15280 switch (GET_MODE (op0))
15283 gcc_unreachable ();
15285 extract = gen_avx_vextractf128v32qi;
15286 move_unaligned = gen_avx_movdqu256;
15290 extract = gen_avx_vextractf128v8sf;
15291 move_unaligned = gen_avx_movups256;
15295 extract = gen_avx_vextractf128v4df;
15296 move_unaligned = gen_avx_movupd256;
15301 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15303 rtx r = gen_reg_rtx (mode);
15304 m = adjust_address (op1, mode, 0);
15305 emit_move_insn (r, m);
15306 m = adjust_address (op1, mode, 16);
15307 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15308 emit_move_insn (op0, r);
15310 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15312 m = adjust_address (op0, mode, 0);
15313 emit_insn (extract (m, op1, const0_rtx));
15314 m = adjust_address (op0, mode, 16);
15315 emit_insn (extract (m, op1, const1_rtx));
15318 emit_insn (move_unaligned (op0, op1));
15321 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15322 straight to ix86_expand_vector_move. */
15323 /* Code generation for scalar reg-reg moves of single and double precision data:
15324 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15328 if (x86_sse_partial_reg_dependency == true)
15333 Code generation for scalar loads of double precision data:
15334 if (x86_sse_split_regs == true)
15335 movlpd mem, reg (gas syntax)
15339 Code generation for unaligned packed loads of single precision data
15340 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15341 if (x86_sse_unaligned_move_optimal)
15344 if (x86_sse_partial_reg_dependency == true)
15356 Code generation for unaligned packed loads of double precision data
15357 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15358 if (x86_sse_unaligned_move_optimal)
15361 if (x86_sse_split_regs == true)
15374 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15383 switch (GET_MODE_CLASS (mode))
15385 case MODE_VECTOR_INT:
15387 switch (GET_MODE_SIZE (mode))
15390 /* If we're optimizing for size, movups is the smallest. */
15391 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15393 op0 = gen_lowpart (V4SFmode, op0);
15394 op1 = gen_lowpart (V4SFmode, op1);
15395 emit_insn (gen_sse_movups (op0, op1));
15398 op0 = gen_lowpart (V16QImode, op0);
15399 op1 = gen_lowpart (V16QImode, op1);
15400 emit_insn (gen_sse2_movdqu (op0, op1));
15403 op0 = gen_lowpart (V32QImode, op0);
15404 op1 = gen_lowpart (V32QImode, op1);
15405 ix86_avx256_split_vector_move_misalign (op0, op1);
15408 gcc_unreachable ();
15411 case MODE_VECTOR_FLOAT:
15412 op0 = gen_lowpart (mode, op0);
15413 op1 = gen_lowpart (mode, op1);
15418 emit_insn (gen_sse_movups (op0, op1));
15421 ix86_avx256_split_vector_move_misalign (op0, op1);
15424 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15426 op0 = gen_lowpart (V4SFmode, op0);
15427 op1 = gen_lowpart (V4SFmode, op1);
15428 emit_insn (gen_sse_movups (op0, op1));
15431 emit_insn (gen_sse2_movupd (op0, op1));
15434 ix86_avx256_split_vector_move_misalign (op0, op1);
15437 gcc_unreachable ();
15442 gcc_unreachable ();
15450 /* If we're optimizing for size, movups is the smallest. */
15451 if (optimize_insn_for_size_p ()
15452 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15454 op0 = gen_lowpart (V4SFmode, op0);
15455 op1 = gen_lowpart (V4SFmode, op1);
15456 emit_insn (gen_sse_movups (op0, op1));
15460 /* ??? If we have typed data, then it would appear that using
15461 movdqu is the only way to get unaligned data loaded with
15463 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15465 op0 = gen_lowpart (V16QImode, op0);
15466 op1 = gen_lowpart (V16QImode, op1);
15467 emit_insn (gen_sse2_movdqu (op0, op1));
15471 if (TARGET_SSE2 && mode == V2DFmode)
15475 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15477 op0 = gen_lowpart (V2DFmode, op0);
15478 op1 = gen_lowpart (V2DFmode, op1);
15479 emit_insn (gen_sse2_movupd (op0, op1));
15483 /* When SSE registers are split into halves, we can avoid
15484 writing to the top half twice. */
15485 if (TARGET_SSE_SPLIT_REGS)
15487 emit_clobber (op0);
15492 /* ??? Not sure about the best option for the Intel chips.
15493 The following would seem to satisfy; the register is
15494 entirely cleared, breaking the dependency chain. We
15495 then store to the upper half, with a dependency depth
15496 of one. A rumor has it that Intel recommends two movsd
15497 followed by an unpacklpd, but this is unconfirmed. And
15498 given that the dependency depth of the unpacklpd would
15499 still be one, I'm not sure why this would be better. */
15500 zero = CONST0_RTX (V2DFmode);
15503 m = adjust_address (op1, DFmode, 0);
15504 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15505 m = adjust_address (op1, DFmode, 8);
15506 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15510 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15512 op0 = gen_lowpart (V4SFmode, op0);
15513 op1 = gen_lowpart (V4SFmode, op1);
15514 emit_insn (gen_sse_movups (op0, op1));
15518 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15519 emit_move_insn (op0, CONST0_RTX (mode));
15521 emit_clobber (op0);
15523 if (mode != V4SFmode)
15524 op0 = gen_lowpart (V4SFmode, op0);
15525 m = adjust_address (op1, V2SFmode, 0);
15526 emit_insn (gen_sse_loadlps (op0, op0, m));
15527 m = adjust_address (op1, V2SFmode, 8);
15528 emit_insn (gen_sse_loadhps (op0, op0, m));
15531 else if (MEM_P (op0))
15533 /* If we're optimizing for size, movups is the smallest. */
15534 if (optimize_insn_for_size_p ()
15535 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15537 op0 = gen_lowpart (V4SFmode, op0);
15538 op1 = gen_lowpart (V4SFmode, op1);
15539 emit_insn (gen_sse_movups (op0, op1));
15543 /* ??? Similar to above, only less clear because of quote
15544 typeless stores unquote. */
15545 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15546 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15548 op0 = gen_lowpart (V16QImode, op0);
15549 op1 = gen_lowpart (V16QImode, op1);
15550 emit_insn (gen_sse2_movdqu (op0, op1));
15554 if (TARGET_SSE2 && mode == V2DFmode)
15556 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15558 op0 = gen_lowpart (V2DFmode, op0);
15559 op1 = gen_lowpart (V2DFmode, op1);
15560 emit_insn (gen_sse2_movupd (op0, op1));
15564 m = adjust_address (op0, DFmode, 0);
15565 emit_insn (gen_sse2_storelpd (m, op1));
15566 m = adjust_address (op0, DFmode, 8);
15567 emit_insn (gen_sse2_storehpd (m, op1));
15572 if (mode != V4SFmode)
15573 op1 = gen_lowpart (V4SFmode, op1);
15575 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15577 op0 = gen_lowpart (V4SFmode, op0);
15578 emit_insn (gen_sse_movups (op0, op1));
15582 m = adjust_address (op0, V2SFmode, 0);
15583 emit_insn (gen_sse_storelps (m, op1));
15584 m = adjust_address (op0, V2SFmode, 8);
15585 emit_insn (gen_sse_storehps (m, op1));
15590 gcc_unreachable ();
15593 /* Expand a push in MODE. This is some mode for which we do not support
15594 proper push instructions, at least from the registers that we expect
15595 the value to live in. */
15598 ix86_expand_push (enum machine_mode mode, rtx x)
15602 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15603 GEN_INT (-GET_MODE_SIZE (mode)),
15604 stack_pointer_rtx, 1, OPTAB_DIRECT);
15605 if (tmp != stack_pointer_rtx)
15606 emit_move_insn (stack_pointer_rtx, tmp);
15608 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15610 /* When we push an operand onto stack, it has to be aligned at least
15611 at the function argument boundary. However since we don't have
15612 the argument type, we can't determine the actual argument
15614 emit_move_insn (tmp, x);
15617 /* Helper function of ix86_fixup_binary_operands to canonicalize
15618 operand order. Returns true if the operands should be swapped. */
15621 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15624 rtx dst = operands[0];
15625 rtx src1 = operands[1];
15626 rtx src2 = operands[2];
15628 /* If the operation is not commutative, we can't do anything. */
15629 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15632 /* Highest priority is that src1 should match dst. */
15633 if (rtx_equal_p (dst, src1))
15635 if (rtx_equal_p (dst, src2))
15638 /* Next highest priority is that immediate constants come second. */
15639 if (immediate_operand (src2, mode))
15641 if (immediate_operand (src1, mode))
15644 /* Lowest priority is that memory references should come second. */
15654 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15655 destination to use for the operation. If different from the true
15656 destination in operands[0], a copy operation will be required. */
15659 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15662 rtx dst = operands[0];
15663 rtx src1 = operands[1];
15664 rtx src2 = operands[2];
15666 /* Canonicalize operand order. */
15667 if (ix86_swap_binary_operands_p (code, mode, operands))
15671 /* It is invalid to swap operands of different modes. */
15672 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15679 /* Both source operands cannot be in memory. */
15680 if (MEM_P (src1) && MEM_P (src2))
15682 /* Optimization: Only read from memory once. */
15683 if (rtx_equal_p (src1, src2))
15685 src2 = force_reg (mode, src2);
15689 src2 = force_reg (mode, src2);
15692 /* If the destination is memory, and we do not have matching source
15693 operands, do things in registers. */
15694 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15695 dst = gen_reg_rtx (mode);
15697 /* Source 1 cannot be a constant. */
15698 if (CONSTANT_P (src1))
15699 src1 = force_reg (mode, src1);
15701 /* Source 1 cannot be a non-matching memory. */
15702 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15703 src1 = force_reg (mode, src1);
15705 operands[1] = src1;
15706 operands[2] = src2;
15710 /* Similarly, but assume that the destination has already been
15711 set up properly. */
15714 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15715 enum machine_mode mode, rtx operands[])
15717 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15718 gcc_assert (dst == operands[0]);
15721 /* Attempt to expand a binary operator. Make the expansion closer to the
15722 actual machine, then just general_operand, which will allow 3 separate
15723 memory references (one output, two input) in a single insn. */
15726 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15729 rtx src1, src2, dst, op, clob;
15731 dst = ix86_fixup_binary_operands (code, mode, operands);
15732 src1 = operands[1];
15733 src2 = operands[2];
15735 /* Emit the instruction. */
15737 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15738 if (reload_in_progress)
15740 /* Reload doesn't know about the flags register, and doesn't know that
15741 it doesn't want to clobber it. We can only do this with PLUS. */
15742 gcc_assert (code == PLUS);
15745 else if (reload_completed
15747 && !rtx_equal_p (dst, src1))
15749 /* This is going to be an LEA; avoid splitting it later. */
15754 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15755 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15758 /* Fix up the destination if needed. */
15759 if (dst != operands[0])
15760 emit_move_insn (operands[0], dst);
15763 /* Return TRUE or FALSE depending on whether the binary operator meets the
15764 appropriate constraints. */
15767 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15770 rtx dst = operands[0];
15771 rtx src1 = operands[1];
15772 rtx src2 = operands[2];
15774 /* Both source operands cannot be in memory. */
15775 if (MEM_P (src1) && MEM_P (src2))
15778 /* Canonicalize operand order for commutative operators. */
15779 if (ix86_swap_binary_operands_p (code, mode, operands))
15786 /* If the destination is memory, we must have a matching source operand. */
15787 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15790 /* Source 1 cannot be a constant. */
15791 if (CONSTANT_P (src1))
15794 /* Source 1 cannot be a non-matching memory. */
15795 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15796 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15797 return (code == AND
15800 || (TARGET_64BIT && mode == DImode))
15801 && satisfies_constraint_L (src2));
15806 /* Attempt to expand a unary operator. Make the expansion closer to the
15807 actual machine, then just general_operand, which will allow 2 separate
15808 memory references (one output, one input) in a single insn. */
15811 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15814 int matching_memory;
15815 rtx src, dst, op, clob;
15820 /* If the destination is memory, and we do not have matching source
15821 operands, do things in registers. */
15822 matching_memory = 0;
15825 if (rtx_equal_p (dst, src))
15826 matching_memory = 1;
15828 dst = gen_reg_rtx (mode);
15831 /* When source operand is memory, destination must match. */
15832 if (MEM_P (src) && !matching_memory)
15833 src = force_reg (mode, src);
15835 /* Emit the instruction. */
15837 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15838 if (reload_in_progress || code == NOT)
15840 /* Reload doesn't know about the flags register, and doesn't know that
15841 it doesn't want to clobber it. */
15842 gcc_assert (code == NOT);
15847 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15848 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15851 /* Fix up the destination if needed. */
15852 if (dst != operands[0])
15853 emit_move_insn (operands[0], dst);
15856 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15857 divisor are within the range [0-255]. */
15860 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15863 rtx end_label, qimode_label;
15864 rtx insn, div, mod;
15865 rtx scratch, tmp0, tmp1, tmp2;
15866 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15867 rtx (*gen_zero_extend) (rtx, rtx);
15868 rtx (*gen_test_ccno_1) (rtx, rtx);
15873 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15874 gen_test_ccno_1 = gen_testsi_ccno_1;
15875 gen_zero_extend = gen_zero_extendqisi2;
15878 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15879 gen_test_ccno_1 = gen_testdi_ccno_1;
15880 gen_zero_extend = gen_zero_extendqidi2;
15883 gcc_unreachable ();
15886 end_label = gen_label_rtx ();
15887 qimode_label = gen_label_rtx ();
15889 scratch = gen_reg_rtx (mode);
15891 /* Use 8bit unsigned divimod if dividend and divisor are within
15892 the range [0-255]. */
15893 emit_move_insn (scratch, operands[2]);
15894 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15895 scratch, 1, OPTAB_DIRECT);
15896 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15897 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15898 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15899 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15900 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15902 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15903 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15904 JUMP_LABEL (insn) = qimode_label;
15906 /* Generate original signed/unsigned divimod. */
15907 div = gen_divmod4_1 (operands[0], operands[1],
15908 operands[2], operands[3]);
15911 /* Branch to the end. */
15912 emit_jump_insn (gen_jump (end_label));
15915 /* Generate 8bit unsigned divide. */
15916 emit_label (qimode_label);
15917 /* Don't use operands[0] for result of 8bit divide since not all
15918 registers support QImode ZERO_EXTRACT. */
15919 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15920 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15921 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15922 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15926 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15927 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15931 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15932 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15935 /* Extract remainder from AH. */
15936 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15937 if (REG_P (operands[1]))
15938 insn = emit_move_insn (operands[1], tmp1);
15941 /* Need a new scratch register since the old one has result
15943 scratch = gen_reg_rtx (mode);
15944 emit_move_insn (scratch, tmp1);
15945 insn = emit_move_insn (operands[1], scratch);
15947 set_unique_reg_note (insn, REG_EQUAL, mod);
15949 /* Zero extend quotient from AL. */
15950 tmp1 = gen_lowpart (QImode, tmp0);
15951 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15952 set_unique_reg_note (insn, REG_EQUAL, div);
15954 emit_label (end_label);
15957 #define LEA_SEARCH_THRESHOLD 12
15959 /* Search backward for non-agu definition of register number REGNO1
15960 or register number REGNO2 in INSN's basic block until
15961 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15962 2. Reach BB boundary, or
15963 3. Reach agu definition.
15964 Returns the distance between the non-agu definition point and INSN.
15965 If no definition point, returns -1. */
15968 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15971 basic_block bb = BLOCK_FOR_INSN (insn);
15974 enum attr_type insn_type;
15976 if (insn != BB_HEAD (bb))
15978 rtx prev = PREV_INSN (insn);
15979 while (prev && distance < LEA_SEARCH_THRESHOLD)
15981 if (NONDEBUG_INSN_P (prev))
15984 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15985 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15986 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15987 && (regno1 == DF_REF_REGNO (*def_rec)
15988 || regno2 == DF_REF_REGNO (*def_rec)))
15990 insn_type = get_attr_type (prev);
15991 if (insn_type != TYPE_LEA)
15995 if (prev == BB_HEAD (bb))
15997 prev = PREV_INSN (prev);
16001 if (distance < LEA_SEARCH_THRESHOLD)
16005 bool simple_loop = false;
16007 FOR_EACH_EDGE (e, ei, bb->preds)
16010 simple_loop = true;
16016 rtx prev = BB_END (bb);
16019 && distance < LEA_SEARCH_THRESHOLD)
16021 if (NONDEBUG_INSN_P (prev))
16024 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16025 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16026 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16027 && (regno1 == DF_REF_REGNO (*def_rec)
16028 || regno2 == DF_REF_REGNO (*def_rec)))
16030 insn_type = get_attr_type (prev);
16031 if (insn_type != TYPE_LEA)
16035 prev = PREV_INSN (prev);
16043 /* get_attr_type may modify recog data. We want to make sure
16044 that recog data is valid for instruction INSN, on which
16045 distance_non_agu_define is called. INSN is unchanged here. */
16046 extract_insn_cached (insn);
16050 /* Return the distance between INSN and the next insn that uses
16051 register number REGNO0 in memory address. Return -1 if no such
16052 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16055 distance_agu_use (unsigned int regno0, rtx insn)
16057 basic_block bb = BLOCK_FOR_INSN (insn);
16062 if (insn != BB_END (bb))
16064 rtx next = NEXT_INSN (insn);
16065 while (next && distance < LEA_SEARCH_THRESHOLD)
16067 if (NONDEBUG_INSN_P (next))
16071 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16072 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16073 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16074 && regno0 == DF_REF_REGNO (*use_rec))
16076 /* Return DISTANCE if OP0 is used in memory
16077 address in NEXT. */
16081 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16082 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16083 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16084 && regno0 == DF_REF_REGNO (*def_rec))
16086 /* Return -1 if OP0 is set in NEXT. */
16090 if (next == BB_END (bb))
16092 next = NEXT_INSN (next);
16096 if (distance < LEA_SEARCH_THRESHOLD)
16100 bool simple_loop = false;
16102 FOR_EACH_EDGE (e, ei, bb->succs)
16105 simple_loop = true;
16111 rtx next = BB_HEAD (bb);
16114 && distance < LEA_SEARCH_THRESHOLD)
16116 if (NONDEBUG_INSN_P (next))
16120 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16121 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16122 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16123 && regno0 == DF_REF_REGNO (*use_rec))
16125 /* Return DISTANCE if OP0 is used in memory
16126 address in NEXT. */
16130 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16131 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16132 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16133 && regno0 == DF_REF_REGNO (*def_rec))
16135 /* Return -1 if OP0 is set in NEXT. */
16140 next = NEXT_INSN (next);
16148 /* Define this macro to tune LEA priority vs ADD, it take effect when
16149 there is a dilemma of choicing LEA or ADD
16150 Negative value: ADD is more preferred than LEA
16152 Positive value: LEA is more preferred than ADD*/
16153 #define IX86_LEA_PRIORITY 2
16155 /* Return true if it is ok to optimize an ADD operation to LEA
16156 operation to avoid flag register consumation. For most processors,
16157 ADD is faster than LEA. For the processors like ATOM, if the
16158 destination register of LEA holds an actual address which will be
16159 used soon, LEA is better and otherwise ADD is better. */
16162 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16164 unsigned int regno0 = true_regnum (operands[0]);
16165 unsigned int regno1 = true_regnum (operands[1]);
16166 unsigned int regno2 = true_regnum (operands[2]);
16168 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16169 if (regno0 != regno1 && regno0 != regno2)
16172 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16176 int dist_define, dist_use;
16178 /* Return false if REGNO0 isn't used in memory address. */
16179 dist_use = distance_agu_use (regno0, insn);
16183 dist_define = distance_non_agu_define (regno1, regno2, insn);
16184 if (dist_define <= 0)
16187 /* If this insn has both backward non-agu dependence and forward
16188 agu dependence, the one with short distance take effect. */
16189 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16196 /* Return true if destination reg of SET_BODY is shift count of
16200 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16206 /* Retrieve destination of SET_BODY. */
16207 switch (GET_CODE (set_body))
16210 set_dest = SET_DEST (set_body);
16211 if (!set_dest || !REG_P (set_dest))
16215 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16216 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16224 /* Retrieve shift count of USE_BODY. */
16225 switch (GET_CODE (use_body))
16228 shift_rtx = XEXP (use_body, 1);
16231 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16232 if (ix86_dep_by_shift_count_body (set_body,
16233 XVECEXP (use_body, 0, i)))
16241 && (GET_CODE (shift_rtx) == ASHIFT
16242 || GET_CODE (shift_rtx) == LSHIFTRT
16243 || GET_CODE (shift_rtx) == ASHIFTRT
16244 || GET_CODE (shift_rtx) == ROTATE
16245 || GET_CODE (shift_rtx) == ROTATERT))
16247 rtx shift_count = XEXP (shift_rtx, 1);
16249 /* Return true if shift count is dest of SET_BODY. */
16250 if (REG_P (shift_count)
16251 && true_regnum (set_dest) == true_regnum (shift_count))
16258 /* Return true if destination reg of SET_INSN is shift count of
16262 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16264 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16265 PATTERN (use_insn));
16268 /* Return TRUE or FALSE depending on whether the unary operator meets the
16269 appropriate constraints. */
16272 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16273 enum machine_mode mode ATTRIBUTE_UNUSED,
16274 rtx operands[2] ATTRIBUTE_UNUSED)
16276 /* If one of operands is memory, source and destination must match. */
16277 if ((MEM_P (operands[0])
16278 || MEM_P (operands[1]))
16279 && ! rtx_equal_p (operands[0], operands[1]))
16284 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16285 are ok, keeping in mind the possible movddup alternative. */
16288 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16290 if (MEM_P (operands[0]))
16291 return rtx_equal_p (operands[0], operands[1 + high]);
16292 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16293 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16297 /* Post-reload splitter for converting an SF or DFmode value in an
16298 SSE register into an unsigned SImode. */
16301 ix86_split_convert_uns_si_sse (rtx operands[])
16303 enum machine_mode vecmode;
16304 rtx value, large, zero_or_two31, input, two31, x;
16306 large = operands[1];
16307 zero_or_two31 = operands[2];
16308 input = operands[3];
16309 two31 = operands[4];
16310 vecmode = GET_MODE (large);
16311 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16313 /* Load up the value into the low element. We must ensure that the other
16314 elements are valid floats -- zero is the easiest such value. */
16317 if (vecmode == V4SFmode)
16318 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16320 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16324 input = gen_rtx_REG (vecmode, REGNO (input));
16325 emit_move_insn (value, CONST0_RTX (vecmode));
16326 if (vecmode == V4SFmode)
16327 emit_insn (gen_sse_movss (value, value, input));
16329 emit_insn (gen_sse2_movsd (value, value, input));
16332 emit_move_insn (large, two31);
16333 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16335 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16336 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16338 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16339 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16341 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16342 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16344 large = gen_rtx_REG (V4SImode, REGNO (large));
16345 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16347 x = gen_rtx_REG (V4SImode, REGNO (value));
16348 if (vecmode == V4SFmode)
16349 emit_insn (gen_sse2_cvttps2dq (x, value));
16351 emit_insn (gen_sse2_cvttpd2dq (x, value));
16354 emit_insn (gen_xorv4si3 (value, value, large));
16357 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16358 Expects the 64-bit DImode to be supplied in a pair of integral
16359 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16360 -mfpmath=sse, !optimize_size only. */
16363 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16365 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16366 rtx int_xmm, fp_xmm;
16367 rtx biases, exponents;
16370 int_xmm = gen_reg_rtx (V4SImode);
16371 if (TARGET_INTER_UNIT_MOVES)
16372 emit_insn (gen_movdi_to_sse (int_xmm, input));
16373 else if (TARGET_SSE_SPLIT_REGS)
16375 emit_clobber (int_xmm);
16376 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16380 x = gen_reg_rtx (V2DImode);
16381 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16382 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16385 x = gen_rtx_CONST_VECTOR (V4SImode,
16386 gen_rtvec (4, GEN_INT (0x43300000UL),
16387 GEN_INT (0x45300000UL),
16388 const0_rtx, const0_rtx));
16389 exponents = validize_mem (force_const_mem (V4SImode, x));
16391 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16392 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16394 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16395 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16396 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16397 (0x1.0p84 + double(fp_value_hi_xmm)).
16398 Note these exponents differ by 32. */
16400 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16402 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16403 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16404 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16405 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16406 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16407 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16408 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16409 biases = validize_mem (force_const_mem (V2DFmode, biases));
16410 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16412 /* Add the upper and lower DFmode values together. */
16414 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16417 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16418 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16419 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16422 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16425 /* Not used, but eases macroization of patterns. */
16427 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16428 rtx input ATTRIBUTE_UNUSED)
16430 gcc_unreachable ();
16433 /* Convert an unsigned SImode value into a DFmode. Only currently used
16434 for SSE, but applicable anywhere. */
16437 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16439 REAL_VALUE_TYPE TWO31r;
16442 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16443 NULL, 1, OPTAB_DIRECT);
16445 fp = gen_reg_rtx (DFmode);
16446 emit_insn (gen_floatsidf2 (fp, x));
16448 real_ldexp (&TWO31r, &dconst1, 31);
16449 x = const_double_from_real_value (TWO31r, DFmode);
16451 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16453 emit_move_insn (target, x);
16456 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16457 32-bit mode; otherwise we have a direct convert instruction. */
16460 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16462 REAL_VALUE_TYPE TWO32r;
16463 rtx fp_lo, fp_hi, x;
16465 fp_lo = gen_reg_rtx (DFmode);
16466 fp_hi = gen_reg_rtx (DFmode);
16468 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16470 real_ldexp (&TWO32r, &dconst1, 32);
16471 x = const_double_from_real_value (TWO32r, DFmode);
16472 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16474 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16476 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16479 emit_move_insn (target, x);
16482 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16483 For x86_32, -mfpmath=sse, !optimize_size only. */
16485 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16487 REAL_VALUE_TYPE ONE16r;
16488 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16490 real_ldexp (&ONE16r, &dconst1, 16);
16491 x = const_double_from_real_value (ONE16r, SFmode);
16492 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16493 NULL, 0, OPTAB_DIRECT);
16494 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16495 NULL, 0, OPTAB_DIRECT);
16496 fp_hi = gen_reg_rtx (SFmode);
16497 fp_lo = gen_reg_rtx (SFmode);
16498 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16499 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16500 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16502 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16504 if (!rtx_equal_p (target, fp_hi))
16505 emit_move_insn (target, fp_hi);
16508 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16509 then replicate the value for all elements of the vector
16513 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16520 v = gen_rtvec (4, value, value, value, value);
16521 return gen_rtx_CONST_VECTOR (V4SImode, v);
16525 v = gen_rtvec (2, value, value);
16526 return gen_rtx_CONST_VECTOR (V2DImode, v);
16530 v = gen_rtvec (8, value, value, value, value,
16531 value, value, value, value);
16533 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16534 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16535 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16536 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16537 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16541 v = gen_rtvec (4, value, value, value, value);
16543 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16544 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16545 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16549 v = gen_rtvec (4, value, value, value, value);
16551 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16552 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16553 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16557 v = gen_rtvec (2, value, value);
16559 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16560 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16563 gcc_unreachable ();
16567 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16568 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16569 for an SSE register. If VECT is true, then replicate the mask for
16570 all elements of the vector register. If INVERT is true, then create
16571 a mask excluding the sign bit. */
16574 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16576 enum machine_mode vec_mode, imode;
16577 HOST_WIDE_INT hi, lo;
16582 /* Find the sign bit, sign extended to 2*HWI. */
16589 mode = GET_MODE_INNER (mode);
16591 lo = 0x80000000, hi = lo < 0;
16598 mode = GET_MODE_INNER (mode);
16600 if (HOST_BITS_PER_WIDE_INT >= 64)
16601 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16603 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16608 vec_mode = VOIDmode;
16609 if (HOST_BITS_PER_WIDE_INT >= 64)
16612 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16619 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16623 lo = ~lo, hi = ~hi;
16629 mask = immed_double_const (lo, hi, imode);
16631 vec = gen_rtvec (2, v, mask);
16632 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16633 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16640 gcc_unreachable ();
16644 lo = ~lo, hi = ~hi;
16646 /* Force this value into the low part of a fp vector constant. */
16647 mask = immed_double_const (lo, hi, imode);
16648 mask = gen_lowpart (mode, mask);
16650 if (vec_mode == VOIDmode)
16651 return force_reg (mode, mask);
16653 v = ix86_build_const_vector (vec_mode, vect, mask);
16654 return force_reg (vec_mode, v);
16657 /* Generate code for floating point ABS or NEG. */
16660 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16663 rtx mask, set, dst, src;
16664 bool use_sse = false;
16665 bool vector_mode = VECTOR_MODE_P (mode);
16666 enum machine_mode vmode = mode;
16670 else if (mode == TFmode)
16672 else if (TARGET_SSE_MATH)
16674 use_sse = SSE_FLOAT_MODE_P (mode);
16675 if (mode == SFmode)
16677 else if (mode == DFmode)
16681 /* NEG and ABS performed with SSE use bitwise mask operations.
16682 Create the appropriate mask now. */
16684 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16691 set = gen_rtx_fmt_e (code, mode, src);
16692 set = gen_rtx_SET (VOIDmode, dst, set);
16699 use = gen_rtx_USE (VOIDmode, mask);
16701 par = gen_rtvec (2, set, use);
16704 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16705 par = gen_rtvec (3, set, use, clob);
16707 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16713 /* Expand a copysign operation. Special case operand 0 being a constant. */
16716 ix86_expand_copysign (rtx operands[])
16718 enum machine_mode mode, vmode;
16719 rtx dest, op0, op1, mask, nmask;
16721 dest = operands[0];
16725 mode = GET_MODE (dest);
16727 if (mode == SFmode)
16729 else if (mode == DFmode)
16734 if (GET_CODE (op0) == CONST_DOUBLE)
16736 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16738 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16739 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16741 if (mode == SFmode || mode == DFmode)
16743 if (op0 == CONST0_RTX (mode))
16744 op0 = CONST0_RTX (vmode);
16747 rtx v = ix86_build_const_vector (vmode, false, op0);
16749 op0 = force_reg (vmode, v);
16752 else if (op0 != CONST0_RTX (mode))
16753 op0 = force_reg (mode, op0);
16755 mask = ix86_build_signbit_mask (vmode, 0, 0);
16757 if (mode == SFmode)
16758 copysign_insn = gen_copysignsf3_const;
16759 else if (mode == DFmode)
16760 copysign_insn = gen_copysigndf3_const;
16762 copysign_insn = gen_copysigntf3_const;
16764 emit_insn (copysign_insn (dest, op0, op1, mask));
16768 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16770 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16771 mask = ix86_build_signbit_mask (vmode, 0, 0);
16773 if (mode == SFmode)
16774 copysign_insn = gen_copysignsf3_var;
16775 else if (mode == DFmode)
16776 copysign_insn = gen_copysigndf3_var;
16778 copysign_insn = gen_copysigntf3_var;
16780 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16784 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16785 be a constant, and so has already been expanded into a vector constant. */
16788 ix86_split_copysign_const (rtx operands[])
16790 enum machine_mode mode, vmode;
16791 rtx dest, op0, mask, x;
16793 dest = operands[0];
16795 mask = operands[3];
16797 mode = GET_MODE (dest);
16798 vmode = GET_MODE (mask);
16800 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16801 x = gen_rtx_AND (vmode, dest, mask);
16802 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16804 if (op0 != CONST0_RTX (vmode))
16806 x = gen_rtx_IOR (vmode, dest, op0);
16807 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16811 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16812 so we have to do two masks. */
16815 ix86_split_copysign_var (rtx operands[])
16817 enum machine_mode mode, vmode;
16818 rtx dest, scratch, op0, op1, mask, nmask, x;
16820 dest = operands[0];
16821 scratch = operands[1];
16824 nmask = operands[4];
16825 mask = operands[5];
16827 mode = GET_MODE (dest);
16828 vmode = GET_MODE (mask);
16830 if (rtx_equal_p (op0, op1))
16832 /* Shouldn't happen often (it's useless, obviously), but when it does
16833 we'd generate incorrect code if we continue below. */
16834 emit_move_insn (dest, op0);
16838 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16840 gcc_assert (REGNO (op1) == REGNO (scratch));
16842 x = gen_rtx_AND (vmode, scratch, mask);
16843 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16846 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16847 x = gen_rtx_NOT (vmode, dest);
16848 x = gen_rtx_AND (vmode, x, op0);
16849 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16853 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16855 x = gen_rtx_AND (vmode, scratch, mask);
16857 else /* alternative 2,4 */
16859 gcc_assert (REGNO (mask) == REGNO (scratch));
16860 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16861 x = gen_rtx_AND (vmode, scratch, op1);
16863 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16865 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16867 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16868 x = gen_rtx_AND (vmode, dest, nmask);
16870 else /* alternative 3,4 */
16872 gcc_assert (REGNO (nmask) == REGNO (dest));
16874 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16875 x = gen_rtx_AND (vmode, dest, op0);
16877 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16880 x = gen_rtx_IOR (vmode, dest, scratch);
16881 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16884 /* Return TRUE or FALSE depending on whether the first SET in INSN
16885 has source and destination with matching CC modes, and that the
16886 CC mode is at least as constrained as REQ_MODE. */
16889 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16892 enum machine_mode set_mode;
16894 set = PATTERN (insn);
16895 if (GET_CODE (set) == PARALLEL)
16896 set = XVECEXP (set, 0, 0);
16897 gcc_assert (GET_CODE (set) == SET);
16898 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16900 set_mode = GET_MODE (SET_DEST (set));
16904 if (req_mode != CCNOmode
16905 && (req_mode != CCmode
16906 || XEXP (SET_SRC (set), 1) != const0_rtx))
16910 if (req_mode == CCGCmode)
16914 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16918 if (req_mode == CCZmode)
16928 if (set_mode != req_mode)
16933 gcc_unreachable ();
16936 return GET_MODE (SET_SRC (set)) == set_mode;
16939 /* Generate insn patterns to do an integer compare of OPERANDS. */
16942 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16944 enum machine_mode cmpmode;
16947 cmpmode = SELECT_CC_MODE (code, op0, op1);
16948 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16950 /* This is very simple, but making the interface the same as in the
16951 FP case makes the rest of the code easier. */
16952 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16953 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16955 /* Return the test that should be put into the flags user, i.e.
16956 the bcc, scc, or cmov instruction. */
16957 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16960 /* Figure out whether to use ordered or unordered fp comparisons.
16961 Return the appropriate mode to use. */
16964 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16966 /* ??? In order to make all comparisons reversible, we do all comparisons
16967 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16968 all forms trapping and nontrapping comparisons, we can make inequality
16969 comparisons trapping again, since it results in better code when using
16970 FCOM based compares. */
16971 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16975 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16977 enum machine_mode mode = GET_MODE (op0);
16979 if (SCALAR_FLOAT_MODE_P (mode))
16981 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16982 return ix86_fp_compare_mode (code);
16987 /* Only zero flag is needed. */
16988 case EQ: /* ZF=0 */
16989 case NE: /* ZF!=0 */
16991 /* Codes needing carry flag. */
16992 case GEU: /* CF=0 */
16993 case LTU: /* CF=1 */
16994 /* Detect overflow checks. They need just the carry flag. */
16995 if (GET_CODE (op0) == PLUS
16996 && rtx_equal_p (op1, XEXP (op0, 0)))
17000 case GTU: /* CF=0 & ZF=0 */
17001 case LEU: /* CF=1 | ZF=1 */
17002 /* Detect overflow checks. They need just the carry flag. */
17003 if (GET_CODE (op0) == MINUS
17004 && rtx_equal_p (op1, XEXP (op0, 0)))
17008 /* Codes possibly doable only with sign flag when
17009 comparing against zero. */
17010 case GE: /* SF=OF or SF=0 */
17011 case LT: /* SF<>OF or SF=1 */
17012 if (op1 == const0_rtx)
17015 /* For other cases Carry flag is not required. */
17017 /* Codes doable only with sign flag when comparing
17018 against zero, but we miss jump instruction for it
17019 so we need to use relational tests against overflow
17020 that thus needs to be zero. */
17021 case GT: /* ZF=0 & SF=OF */
17022 case LE: /* ZF=1 | SF<>OF */
17023 if (op1 == const0_rtx)
17027 /* strcmp pattern do (use flags) and combine may ask us for proper
17032 gcc_unreachable ();
17036 /* Return the fixed registers used for condition codes. */
17039 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17046 /* If two condition code modes are compatible, return a condition code
17047 mode which is compatible with both. Otherwise, return
17050 static enum machine_mode
17051 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17056 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17059 if ((m1 == CCGCmode && m2 == CCGOCmode)
17060 || (m1 == CCGOCmode && m2 == CCGCmode))
17066 gcc_unreachable ();
17096 /* These are only compatible with themselves, which we already
17103 /* Return a comparison we can do and that it is equivalent to
17104 swap_condition (code) apart possibly from orderedness.
17105 But, never change orderedness if TARGET_IEEE_FP, returning
17106 UNKNOWN in that case if necessary. */
17108 static enum rtx_code
17109 ix86_fp_swap_condition (enum rtx_code code)
17113 case GT: /* GTU - CF=0 & ZF=0 */
17114 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17115 case GE: /* GEU - CF=0 */
17116 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17117 case UNLT: /* LTU - CF=1 */
17118 return TARGET_IEEE_FP ? UNKNOWN : GT;
17119 case UNLE: /* LEU - CF=1 | ZF=1 */
17120 return TARGET_IEEE_FP ? UNKNOWN : GE;
17122 return swap_condition (code);
17126 /* Return cost of comparison CODE using the best strategy for performance.
17127 All following functions do use number of instructions as a cost metrics.
17128 In future this should be tweaked to compute bytes for optimize_size and
17129 take into account performance of various instructions on various CPUs. */
17132 ix86_fp_comparison_cost (enum rtx_code code)
17136 /* The cost of code using bit-twiddling on %ah. */
17153 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17157 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17160 gcc_unreachable ();
17163 switch (ix86_fp_comparison_strategy (code))
17165 case IX86_FPCMP_COMI:
17166 return arith_cost > 4 ? 3 : 2;
17167 case IX86_FPCMP_SAHF:
17168 return arith_cost > 4 ? 4 : 3;
17174 /* Return strategy to use for floating-point. We assume that fcomi is always
17175 preferrable where available, since that is also true when looking at size
17176 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17178 enum ix86_fpcmp_strategy
17179 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17181 /* Do fcomi/sahf based test when profitable. */
17184 return IX86_FPCMP_COMI;
17186 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17187 return IX86_FPCMP_SAHF;
17189 return IX86_FPCMP_ARITH;
17192 /* Swap, force into registers, or otherwise massage the two operands
17193 to a fp comparison. The operands are updated in place; the new
17194 comparison code is returned. */
17196 static enum rtx_code
17197 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17199 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17200 rtx op0 = *pop0, op1 = *pop1;
17201 enum machine_mode op_mode = GET_MODE (op0);
17202 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17204 /* All of the unordered compare instructions only work on registers.
17205 The same is true of the fcomi compare instructions. The XFmode
17206 compare instructions require registers except when comparing
17207 against zero or when converting operand 1 from fixed point to
17211 && (fpcmp_mode == CCFPUmode
17212 || (op_mode == XFmode
17213 && ! (standard_80387_constant_p (op0) == 1
17214 || standard_80387_constant_p (op1) == 1)
17215 && GET_CODE (op1) != FLOAT)
17216 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17218 op0 = force_reg (op_mode, op0);
17219 op1 = force_reg (op_mode, op1);
17223 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17224 things around if they appear profitable, otherwise force op0
17225 into a register. */
17227 if (standard_80387_constant_p (op0) == 0
17229 && ! (standard_80387_constant_p (op1) == 0
17232 enum rtx_code new_code = ix86_fp_swap_condition (code);
17233 if (new_code != UNKNOWN)
17236 tmp = op0, op0 = op1, op1 = tmp;
17242 op0 = force_reg (op_mode, op0);
17244 if (CONSTANT_P (op1))
17246 int tmp = standard_80387_constant_p (op1);
17248 op1 = validize_mem (force_const_mem (op_mode, op1));
17252 op1 = force_reg (op_mode, op1);
17255 op1 = force_reg (op_mode, op1);
17259 /* Try to rearrange the comparison to make it cheaper. */
17260 if (ix86_fp_comparison_cost (code)
17261 > ix86_fp_comparison_cost (swap_condition (code))
17262 && (REG_P (op1) || can_create_pseudo_p ()))
17265 tmp = op0, op0 = op1, op1 = tmp;
17266 code = swap_condition (code);
17268 op0 = force_reg (op_mode, op0);
17276 /* Convert comparison codes we use to represent FP comparison to integer
17277 code that will result in proper branch. Return UNKNOWN if no such code
17281 ix86_fp_compare_code_to_integer (enum rtx_code code)
17310 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17313 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17315 enum machine_mode fpcmp_mode, intcmp_mode;
17318 fpcmp_mode = ix86_fp_compare_mode (code);
17319 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17321 /* Do fcomi/sahf based test when profitable. */
17322 switch (ix86_fp_comparison_strategy (code))
17324 case IX86_FPCMP_COMI:
17325 intcmp_mode = fpcmp_mode;
17326 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17327 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17332 case IX86_FPCMP_SAHF:
17333 intcmp_mode = fpcmp_mode;
17334 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17335 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17339 scratch = gen_reg_rtx (HImode);
17340 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17341 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17344 case IX86_FPCMP_ARITH:
17345 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17346 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17347 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17349 scratch = gen_reg_rtx (HImode);
17350 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17352 /* In the unordered case, we have to check C2 for NaN's, which
17353 doesn't happen to work out to anything nice combination-wise.
17354 So do some bit twiddling on the value we've got in AH to come
17355 up with an appropriate set of condition codes. */
17357 intcmp_mode = CCNOmode;
17362 if (code == GT || !TARGET_IEEE_FP)
17364 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17369 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17370 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17371 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17372 intcmp_mode = CCmode;
17378 if (code == LT && TARGET_IEEE_FP)
17380 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17381 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17382 intcmp_mode = CCmode;
17387 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17393 if (code == GE || !TARGET_IEEE_FP)
17395 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17400 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17401 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17407 if (code == LE && TARGET_IEEE_FP)
17409 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17410 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17411 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17412 intcmp_mode = CCmode;
17417 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17423 if (code == EQ && TARGET_IEEE_FP)
17425 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17426 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17427 intcmp_mode = CCmode;
17432 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17438 if (code == NE && TARGET_IEEE_FP)
17440 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17441 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17447 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17453 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17457 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17462 gcc_unreachable ();
17470 /* Return the test that should be put into the flags user, i.e.
17471 the bcc, scc, or cmov instruction. */
17472 return gen_rtx_fmt_ee (code, VOIDmode,
17473 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17478 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17482 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17483 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17485 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17487 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17488 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17491 ret = ix86_expand_int_compare (code, op0, op1);
17497 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17499 enum machine_mode mode = GET_MODE (op0);
17511 tmp = ix86_expand_compare (code, op0, op1);
17512 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17513 gen_rtx_LABEL_REF (VOIDmode, label),
17515 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17522 /* Expand DImode branch into multiple compare+branch. */
17524 rtx lo[2], hi[2], label2;
17525 enum rtx_code code1, code2, code3;
17526 enum machine_mode submode;
17528 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17530 tmp = op0, op0 = op1, op1 = tmp;
17531 code = swap_condition (code);
17534 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17535 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17537 submode = mode == DImode ? SImode : DImode;
17539 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17540 avoid two branches. This costs one extra insn, so disable when
17541 optimizing for size. */
17543 if ((code == EQ || code == NE)
17544 && (!optimize_insn_for_size_p ()
17545 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17550 if (hi[1] != const0_rtx)
17551 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17552 NULL_RTX, 0, OPTAB_WIDEN);
17555 if (lo[1] != const0_rtx)
17556 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17557 NULL_RTX, 0, OPTAB_WIDEN);
17559 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17560 NULL_RTX, 0, OPTAB_WIDEN);
17562 ix86_expand_branch (code, tmp, const0_rtx, label);
17566 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17567 op1 is a constant and the low word is zero, then we can just
17568 examine the high word. Similarly for low word -1 and
17569 less-or-equal-than or greater-than. */
17571 if (CONST_INT_P (hi[1]))
17574 case LT: case LTU: case GE: case GEU:
17575 if (lo[1] == const0_rtx)
17577 ix86_expand_branch (code, hi[0], hi[1], label);
17581 case LE: case LEU: case GT: case GTU:
17582 if (lo[1] == constm1_rtx)
17584 ix86_expand_branch (code, hi[0], hi[1], label);
17592 /* Otherwise, we need two or three jumps. */
17594 label2 = gen_label_rtx ();
17597 code2 = swap_condition (code);
17598 code3 = unsigned_condition (code);
17602 case LT: case GT: case LTU: case GTU:
17605 case LE: code1 = LT; code2 = GT; break;
17606 case GE: code1 = GT; code2 = LT; break;
17607 case LEU: code1 = LTU; code2 = GTU; break;
17608 case GEU: code1 = GTU; code2 = LTU; break;
17610 case EQ: code1 = UNKNOWN; code2 = NE; break;
17611 case NE: code2 = UNKNOWN; break;
17614 gcc_unreachable ();
17619 * if (hi(a) < hi(b)) goto true;
17620 * if (hi(a) > hi(b)) goto false;
17621 * if (lo(a) < lo(b)) goto true;
17625 if (code1 != UNKNOWN)
17626 ix86_expand_branch (code1, hi[0], hi[1], label);
17627 if (code2 != UNKNOWN)
17628 ix86_expand_branch (code2, hi[0], hi[1], label2);
17630 ix86_expand_branch (code3, lo[0], lo[1], label);
17632 if (code2 != UNKNOWN)
17633 emit_label (label2);
17638 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17643 /* Split branch based on floating point condition. */
17645 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17646 rtx target1, rtx target2, rtx tmp, rtx pushed)
17651 if (target2 != pc_rtx)
17654 code = reverse_condition_maybe_unordered (code);
17659 condition = ix86_expand_fp_compare (code, op1, op2,
17662 /* Remove pushed operand from stack. */
17664 ix86_free_from_memory (GET_MODE (pushed));
17666 i = emit_jump_insn (gen_rtx_SET
17668 gen_rtx_IF_THEN_ELSE (VOIDmode,
17669 condition, target1, target2)));
17670 if (split_branch_probability >= 0)
17671 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17675 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17679 gcc_assert (GET_MODE (dest) == QImode);
17681 ret = ix86_expand_compare (code, op0, op1);
17682 PUT_MODE (ret, QImode);
17683 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17686 /* Expand comparison setting or clearing carry flag. Return true when
17687 successful and set pop for the operation. */
17689 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17691 enum machine_mode mode =
17692 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17694 /* Do not handle double-mode compares that go through special path. */
17695 if (mode == (TARGET_64BIT ? TImode : DImode))
17698 if (SCALAR_FLOAT_MODE_P (mode))
17700 rtx compare_op, compare_seq;
17702 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17704 /* Shortcut: following common codes never translate
17705 into carry flag compares. */
17706 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17707 || code == ORDERED || code == UNORDERED)
17710 /* These comparisons require zero flag; swap operands so they won't. */
17711 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17712 && !TARGET_IEEE_FP)
17717 code = swap_condition (code);
17720 /* Try to expand the comparison and verify that we end up with
17721 carry flag based comparison. This fails to be true only when
17722 we decide to expand comparison using arithmetic that is not
17723 too common scenario. */
17725 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17726 compare_seq = get_insns ();
17729 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17730 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17731 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17733 code = GET_CODE (compare_op);
17735 if (code != LTU && code != GEU)
17738 emit_insn (compare_seq);
17743 if (!INTEGRAL_MODE_P (mode))
17752 /* Convert a==0 into (unsigned)a<1. */
17755 if (op1 != const0_rtx)
17758 code = (code == EQ ? LTU : GEU);
17761 /* Convert a>b into b<a or a>=b-1. */
17764 if (CONST_INT_P (op1))
17766 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17767 /* Bail out on overflow. We still can swap operands but that
17768 would force loading of the constant into register. */
17769 if (op1 == const0_rtx
17770 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17772 code = (code == GTU ? GEU : LTU);
17779 code = (code == GTU ? LTU : GEU);
17783 /* Convert a>=0 into (unsigned)a<0x80000000. */
17786 if (mode == DImode || op1 != const0_rtx)
17788 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17789 code = (code == LT ? GEU : LTU);
17793 if (mode == DImode || op1 != constm1_rtx)
17795 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17796 code = (code == LE ? GEU : LTU);
17802 /* Swapping operands may cause constant to appear as first operand. */
17803 if (!nonimmediate_operand (op0, VOIDmode))
17805 if (!can_create_pseudo_p ())
17807 op0 = force_reg (mode, op0);
17809 *pop = ix86_expand_compare (code, op0, op1);
17810 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17815 ix86_expand_int_movcc (rtx operands[])
17817 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17818 rtx compare_seq, compare_op;
17819 enum machine_mode mode = GET_MODE (operands[0]);
17820 bool sign_bit_compare_p = false;
17821 rtx op0 = XEXP (operands[1], 0);
17822 rtx op1 = XEXP (operands[1], 1);
17825 compare_op = ix86_expand_compare (code, op0, op1);
17826 compare_seq = get_insns ();
17829 compare_code = GET_CODE (compare_op);
17831 if ((op1 == const0_rtx && (code == GE || code == LT))
17832 || (op1 == constm1_rtx && (code == GT || code == LE)))
17833 sign_bit_compare_p = true;
17835 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17836 HImode insns, we'd be swallowed in word prefix ops. */
17838 if ((mode != HImode || TARGET_FAST_PREFIX)
17839 && (mode != (TARGET_64BIT ? TImode : DImode))
17840 && CONST_INT_P (operands[2])
17841 && CONST_INT_P (operands[3]))
17843 rtx out = operands[0];
17844 HOST_WIDE_INT ct = INTVAL (operands[2]);
17845 HOST_WIDE_INT cf = INTVAL (operands[3]);
17846 HOST_WIDE_INT diff;
17849 /* Sign bit compares are better done using shifts than we do by using
17851 if (sign_bit_compare_p
17852 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17854 /* Detect overlap between destination and compare sources. */
17857 if (!sign_bit_compare_p)
17860 bool fpcmp = false;
17862 compare_code = GET_CODE (compare_op);
17864 flags = XEXP (compare_op, 0);
17866 if (GET_MODE (flags) == CCFPmode
17867 || GET_MODE (flags) == CCFPUmode)
17871 = ix86_fp_compare_code_to_integer (compare_code);
17874 /* To simplify rest of code, restrict to the GEU case. */
17875 if (compare_code == LTU)
17877 HOST_WIDE_INT tmp = ct;
17880 compare_code = reverse_condition (compare_code);
17881 code = reverse_condition (code);
17886 PUT_CODE (compare_op,
17887 reverse_condition_maybe_unordered
17888 (GET_CODE (compare_op)));
17890 PUT_CODE (compare_op,
17891 reverse_condition (GET_CODE (compare_op)));
17895 if (reg_overlap_mentioned_p (out, op0)
17896 || reg_overlap_mentioned_p (out, op1))
17897 tmp = gen_reg_rtx (mode);
17899 if (mode == DImode)
17900 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17902 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17903 flags, compare_op));
17907 if (code == GT || code == GE)
17908 code = reverse_condition (code);
17911 HOST_WIDE_INT tmp = ct;
17916 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17929 tmp = expand_simple_binop (mode, PLUS,
17931 copy_rtx (tmp), 1, OPTAB_DIRECT);
17942 tmp = expand_simple_binop (mode, IOR,
17944 copy_rtx (tmp), 1, OPTAB_DIRECT);
17946 else if (diff == -1 && ct)
17956 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17958 tmp = expand_simple_binop (mode, PLUS,
17959 copy_rtx (tmp), GEN_INT (cf),
17960 copy_rtx (tmp), 1, OPTAB_DIRECT);
17968 * andl cf - ct, dest
17978 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17981 tmp = expand_simple_binop (mode, AND,
17983 gen_int_mode (cf - ct, mode),
17984 copy_rtx (tmp), 1, OPTAB_DIRECT);
17986 tmp = expand_simple_binop (mode, PLUS,
17987 copy_rtx (tmp), GEN_INT (ct),
17988 copy_rtx (tmp), 1, OPTAB_DIRECT);
17991 if (!rtx_equal_p (tmp, out))
17992 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17999 enum machine_mode cmp_mode = GET_MODE (op0);
18002 tmp = ct, ct = cf, cf = tmp;
18005 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18007 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18009 /* We may be reversing unordered compare to normal compare, that
18010 is not valid in general (we may convert non-trapping condition
18011 to trapping one), however on i386 we currently emit all
18012 comparisons unordered. */
18013 compare_code = reverse_condition_maybe_unordered (compare_code);
18014 code = reverse_condition_maybe_unordered (code);
18018 compare_code = reverse_condition (compare_code);
18019 code = reverse_condition (code);
18023 compare_code = UNKNOWN;
18024 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18025 && CONST_INT_P (op1))
18027 if (op1 == const0_rtx
18028 && (code == LT || code == GE))
18029 compare_code = code;
18030 else if (op1 == constm1_rtx)
18034 else if (code == GT)
18039 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18040 if (compare_code != UNKNOWN
18041 && GET_MODE (op0) == GET_MODE (out)
18042 && (cf == -1 || ct == -1))
18044 /* If lea code below could be used, only optimize
18045 if it results in a 2 insn sequence. */
18047 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18048 || diff == 3 || diff == 5 || diff == 9)
18049 || (compare_code == LT && ct == -1)
18050 || (compare_code == GE && cf == -1))
18053 * notl op1 (if necessary)
18061 code = reverse_condition (code);
18064 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18066 out = expand_simple_binop (mode, IOR,
18068 out, 1, OPTAB_DIRECT);
18069 if (out != operands[0])
18070 emit_move_insn (operands[0], out);
18077 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18078 || diff == 3 || diff == 5 || diff == 9)
18079 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18081 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18087 * lea cf(dest*(ct-cf)),dest
18091 * This also catches the degenerate setcc-only case.
18097 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18100 /* On x86_64 the lea instruction operates on Pmode, so we need
18101 to get arithmetics done in proper mode to match. */
18103 tmp = copy_rtx (out);
18107 out1 = copy_rtx (out);
18108 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18112 tmp = gen_rtx_PLUS (mode, tmp, out1);
18118 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18121 if (!rtx_equal_p (tmp, out))
18124 out = force_operand (tmp, copy_rtx (out));
18126 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18128 if (!rtx_equal_p (out, operands[0]))
18129 emit_move_insn (operands[0], copy_rtx (out));
18135 * General case: Jumpful:
18136 * xorl dest,dest cmpl op1, op2
18137 * cmpl op1, op2 movl ct, dest
18138 * setcc dest jcc 1f
18139 * decl dest movl cf, dest
18140 * andl (cf-ct),dest 1:
18143 * Size 20. Size 14.
18145 * This is reasonably steep, but branch mispredict costs are
18146 * high on modern cpus, so consider failing only if optimizing
18150 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18151 && BRANCH_COST (optimize_insn_for_speed_p (),
18156 enum machine_mode cmp_mode = GET_MODE (op0);
18161 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18163 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18165 /* We may be reversing unordered compare to normal compare,
18166 that is not valid in general (we may convert non-trapping
18167 condition to trapping one), however on i386 we currently
18168 emit all comparisons unordered. */
18169 code = reverse_condition_maybe_unordered (code);
18173 code = reverse_condition (code);
18174 if (compare_code != UNKNOWN)
18175 compare_code = reverse_condition (compare_code);
18179 if (compare_code != UNKNOWN)
18181 /* notl op1 (if needed)
18186 For x < 0 (resp. x <= -1) there will be no notl,
18187 so if possible swap the constants to get rid of the
18189 True/false will be -1/0 while code below (store flag
18190 followed by decrement) is 0/-1, so the constants need
18191 to be exchanged once more. */
18193 if (compare_code == GE || !cf)
18195 code = reverse_condition (code);
18200 HOST_WIDE_INT tmp = cf;
18205 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18209 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18211 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18213 copy_rtx (out), 1, OPTAB_DIRECT);
18216 out = expand_simple_binop (mode, AND, copy_rtx (out),
18217 gen_int_mode (cf - ct, mode),
18218 copy_rtx (out), 1, OPTAB_DIRECT);
18220 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18221 copy_rtx (out), 1, OPTAB_DIRECT);
18222 if (!rtx_equal_p (out, operands[0]))
18223 emit_move_insn (operands[0], copy_rtx (out));
18229 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18231 /* Try a few things more with specific constants and a variable. */
18234 rtx var, orig_out, out, tmp;
18236 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18239 /* If one of the two operands is an interesting constant, load a
18240 constant with the above and mask it in with a logical operation. */
18242 if (CONST_INT_P (operands[2]))
18245 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18246 operands[3] = constm1_rtx, op = and_optab;
18247 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18248 operands[3] = const0_rtx, op = ior_optab;
18252 else if (CONST_INT_P (operands[3]))
18255 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18256 operands[2] = constm1_rtx, op = and_optab;
18257 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18258 operands[2] = const0_rtx, op = ior_optab;
18265 orig_out = operands[0];
18266 tmp = gen_reg_rtx (mode);
18269 /* Recurse to get the constant loaded. */
18270 if (ix86_expand_int_movcc (operands) == 0)
18273 /* Mask in the interesting variable. */
18274 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18276 if (!rtx_equal_p (out, orig_out))
18277 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18283 * For comparison with above,
18293 if (! nonimmediate_operand (operands[2], mode))
18294 operands[2] = force_reg (mode, operands[2]);
18295 if (! nonimmediate_operand (operands[3], mode))
18296 operands[3] = force_reg (mode, operands[3]);
18298 if (! register_operand (operands[2], VOIDmode)
18300 || ! register_operand (operands[3], VOIDmode)))
18301 operands[2] = force_reg (mode, operands[2]);
18304 && ! register_operand (operands[3], VOIDmode))
18305 operands[3] = force_reg (mode, operands[3]);
18307 emit_insn (compare_seq);
18308 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18309 gen_rtx_IF_THEN_ELSE (mode,
18310 compare_op, operands[2],
18315 /* Swap, force into registers, or otherwise massage the two operands
18316 to an sse comparison with a mask result. Thus we differ a bit from
18317 ix86_prepare_fp_compare_args which expects to produce a flags result.
18319 The DEST operand exists to help determine whether to commute commutative
18320 operators. The POP0/POP1 operands are updated in place. The new
18321 comparison code is returned, or UNKNOWN if not implementable. */
18323 static enum rtx_code
18324 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18325 rtx *pop0, rtx *pop1)
18333 /* We have no LTGT as an operator. We could implement it with
18334 NE & ORDERED, but this requires an extra temporary. It's
18335 not clear that it's worth it. */
18342 /* These are supported directly. */
18349 /* For commutative operators, try to canonicalize the destination
18350 operand to be first in the comparison - this helps reload to
18351 avoid extra moves. */
18352 if (!dest || !rtx_equal_p (dest, *pop1))
18360 /* These are not supported directly. Swap the comparison operands
18361 to transform into something that is supported. */
18365 code = swap_condition (code);
18369 gcc_unreachable ();
18375 /* Detect conditional moves that exactly match min/max operational
18376 semantics. Note that this is IEEE safe, as long as we don't
18377 interchange the operands.
18379 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18380 and TRUE if the operation is successful and instructions are emitted. */
18383 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18384 rtx cmp_op1, rtx if_true, rtx if_false)
18386 enum machine_mode mode;
18392 else if (code == UNGE)
18395 if_true = if_false;
18401 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18403 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18408 mode = GET_MODE (dest);
18410 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18411 but MODE may be a vector mode and thus not appropriate. */
18412 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18414 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18417 if_true = force_reg (mode, if_true);
18418 v = gen_rtvec (2, if_true, if_false);
18419 tmp = gen_rtx_UNSPEC (mode, v, u);
18423 code = is_min ? SMIN : SMAX;
18424 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18427 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18431 /* Expand an sse vector comparison. Return the register with the result. */
18434 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18435 rtx op_true, rtx op_false)
18437 enum machine_mode mode = GET_MODE (dest);
18440 cmp_op0 = force_reg (mode, cmp_op0);
18441 if (!nonimmediate_operand (cmp_op1, mode))
18442 cmp_op1 = force_reg (mode, cmp_op1);
18445 || reg_overlap_mentioned_p (dest, op_true)
18446 || reg_overlap_mentioned_p (dest, op_false))
18447 dest = gen_reg_rtx (mode);
18449 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18450 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18455 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18456 operations. This is used for both scalar and vector conditional moves. */
18459 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18461 enum machine_mode mode = GET_MODE (dest);
18464 if (op_false == CONST0_RTX (mode))
18466 op_true = force_reg (mode, op_true);
18467 x = gen_rtx_AND (mode, cmp, op_true);
18468 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18470 else if (op_true == CONST0_RTX (mode))
18472 op_false = force_reg (mode, op_false);
18473 x = gen_rtx_NOT (mode, cmp);
18474 x = gen_rtx_AND (mode, x, op_false);
18475 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18477 else if (TARGET_XOP)
18479 rtx pcmov = gen_rtx_SET (mode, dest,
18480 gen_rtx_IF_THEN_ELSE (mode, cmp,
18487 op_true = force_reg (mode, op_true);
18488 op_false = force_reg (mode, op_false);
18490 t2 = gen_reg_rtx (mode);
18492 t3 = gen_reg_rtx (mode);
18496 x = gen_rtx_AND (mode, op_true, cmp);
18497 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18499 x = gen_rtx_NOT (mode, cmp);
18500 x = gen_rtx_AND (mode, x, op_false);
18501 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18503 x = gen_rtx_IOR (mode, t3, t2);
18504 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18508 /* Expand a floating-point conditional move. Return true if successful. */
18511 ix86_expand_fp_movcc (rtx operands[])
18513 enum machine_mode mode = GET_MODE (operands[0]);
18514 enum rtx_code code = GET_CODE (operands[1]);
18515 rtx tmp, compare_op;
18516 rtx op0 = XEXP (operands[1], 0);
18517 rtx op1 = XEXP (operands[1], 1);
18519 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18521 enum machine_mode cmode;
18523 /* Since we've no cmove for sse registers, don't force bad register
18524 allocation just to gain access to it. Deny movcc when the
18525 comparison mode doesn't match the move mode. */
18526 cmode = GET_MODE (op0);
18527 if (cmode == VOIDmode)
18528 cmode = GET_MODE (op1);
18532 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18533 if (code == UNKNOWN)
18536 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18537 operands[2], operands[3]))
18540 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18541 operands[2], operands[3]);
18542 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18546 /* The floating point conditional move instructions don't directly
18547 support conditions resulting from a signed integer comparison. */
18549 compare_op = ix86_expand_compare (code, op0, op1);
18550 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18552 tmp = gen_reg_rtx (QImode);
18553 ix86_expand_setcc (tmp, code, op0, op1);
18555 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18558 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18559 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18560 operands[2], operands[3])));
18565 /* Expand a floating-point vector conditional move; a vcond operation
18566 rather than a movcc operation. */
18569 ix86_expand_fp_vcond (rtx operands[])
18571 enum rtx_code code = GET_CODE (operands[3]);
18574 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18575 &operands[4], &operands[5]);
18576 if (code == UNKNOWN)
18579 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18580 operands[5], operands[1], operands[2]))
18583 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18584 operands[1], operands[2]);
18585 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18589 /* Expand a signed/unsigned integral vector conditional move. */
18592 ix86_expand_int_vcond (rtx operands[])
18594 enum machine_mode mode = GET_MODE (operands[0]);
18595 enum rtx_code code = GET_CODE (operands[3]);
18596 bool negate = false;
18599 cop0 = operands[4];
18600 cop1 = operands[5];
18602 /* XOP supports all of the comparisons on all vector int types. */
18605 /* Canonicalize the comparison to EQ, GT, GTU. */
18616 code = reverse_condition (code);
18622 code = reverse_condition (code);
18628 code = swap_condition (code);
18629 x = cop0, cop0 = cop1, cop1 = x;
18633 gcc_unreachable ();
18636 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18637 if (mode == V2DImode)
18642 /* SSE4.1 supports EQ. */
18643 if (!TARGET_SSE4_1)
18649 /* SSE4.2 supports GT/GTU. */
18650 if (!TARGET_SSE4_2)
18655 gcc_unreachable ();
18659 /* Unsigned parallel compare is not supported by the hardware.
18660 Play some tricks to turn this into a signed comparison
18664 cop0 = force_reg (mode, cop0);
18672 rtx (*gen_sub3) (rtx, rtx, rtx);
18674 /* Subtract (-(INT MAX) - 1) from both operands to make
18676 mask = ix86_build_signbit_mask (mode, true, false);
18677 gen_sub3 = (mode == V4SImode
18678 ? gen_subv4si3 : gen_subv2di3);
18679 t1 = gen_reg_rtx (mode);
18680 emit_insn (gen_sub3 (t1, cop0, mask));
18682 t2 = gen_reg_rtx (mode);
18683 emit_insn (gen_sub3 (t2, cop1, mask));
18693 /* Perform a parallel unsigned saturating subtraction. */
18694 x = gen_reg_rtx (mode);
18695 emit_insn (gen_rtx_SET (VOIDmode, x,
18696 gen_rtx_US_MINUS (mode, cop0, cop1)));
18699 cop1 = CONST0_RTX (mode);
18705 gcc_unreachable ();
18710 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18711 operands[1+negate], operands[2-negate]);
18713 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18714 operands[2-negate]);
18718 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18719 true if we should do zero extension, else sign extension. HIGH_P is
18720 true if we want the N/2 high elements, else the low elements. */
18723 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18725 enum machine_mode imode = GET_MODE (operands[1]);
18730 rtx (*unpack)(rtx, rtx);
18736 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18738 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18742 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18744 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18748 unpack = gen_sse4_1_zero_extendv2siv2di2;
18750 unpack = gen_sse4_1_sign_extendv2siv2di2;
18753 gcc_unreachable ();
18758 /* Shift higher 8 bytes to lower 8 bytes. */
18759 tmp = gen_reg_rtx (imode);
18760 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
18761 gen_lowpart (V1TImode, operands[1]),
18767 emit_insn (unpack (operands[0], tmp));
18771 rtx (*unpack)(rtx, rtx, rtx);
18777 unpack = gen_vec_interleave_highv16qi;
18779 unpack = gen_vec_interleave_lowv16qi;
18783 unpack = gen_vec_interleave_highv8hi;
18785 unpack = gen_vec_interleave_lowv8hi;
18789 unpack = gen_vec_interleave_highv4si;
18791 unpack = gen_vec_interleave_lowv4si;
18794 gcc_unreachable ();
18797 dest = gen_lowpart (imode, operands[0]);
18800 tmp = force_reg (imode, CONST0_RTX (imode));
18802 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18803 operands[1], pc_rtx, pc_rtx);
18805 emit_insn (unpack (dest, operands[1], tmp));
18809 /* Expand conditional increment or decrement using adb/sbb instructions.
18810 The default case using setcc followed by the conditional move can be
18811 done by generic code. */
18813 ix86_expand_int_addcc (rtx operands[])
18815 enum rtx_code code = GET_CODE (operands[1]);
18817 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18819 rtx val = const0_rtx;
18820 bool fpcmp = false;
18821 enum machine_mode mode;
18822 rtx op0 = XEXP (operands[1], 0);
18823 rtx op1 = XEXP (operands[1], 1);
18825 if (operands[3] != const1_rtx
18826 && operands[3] != constm1_rtx)
18828 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18830 code = GET_CODE (compare_op);
18832 flags = XEXP (compare_op, 0);
18834 if (GET_MODE (flags) == CCFPmode
18835 || GET_MODE (flags) == CCFPUmode)
18838 code = ix86_fp_compare_code_to_integer (code);
18845 PUT_CODE (compare_op,
18846 reverse_condition_maybe_unordered
18847 (GET_CODE (compare_op)));
18849 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18852 mode = GET_MODE (operands[0]);
18854 /* Construct either adc or sbb insn. */
18855 if ((code == LTU) == (operands[3] == constm1_rtx))
18860 insn = gen_subqi3_carry;
18863 insn = gen_subhi3_carry;
18866 insn = gen_subsi3_carry;
18869 insn = gen_subdi3_carry;
18872 gcc_unreachable ();
18880 insn = gen_addqi3_carry;
18883 insn = gen_addhi3_carry;
18886 insn = gen_addsi3_carry;
18889 insn = gen_adddi3_carry;
18892 gcc_unreachable ();
18895 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18901 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18902 but works for floating pointer parameters and nonoffsetable memories.
18903 For pushes, it returns just stack offsets; the values will be saved
18904 in the right order. Maximally three parts are generated. */
18907 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18912 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18914 size = (GET_MODE_SIZE (mode) + 4) / 8;
18916 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18917 gcc_assert (size >= 2 && size <= 4);
18919 /* Optimize constant pool reference to immediates. This is used by fp
18920 moves, that force all constants to memory to allow combining. */
18921 if (MEM_P (operand) && MEM_READONLY_P (operand))
18923 rtx tmp = maybe_get_pool_constant (operand);
18928 if (MEM_P (operand) && !offsettable_memref_p (operand))
18930 /* The only non-offsetable memories we handle are pushes. */
18931 int ok = push_operand (operand, VOIDmode);
18935 operand = copy_rtx (operand);
18936 PUT_MODE (operand, Pmode);
18937 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18941 if (GET_CODE (operand) == CONST_VECTOR)
18943 enum machine_mode imode = int_mode_for_mode (mode);
18944 /* Caution: if we looked through a constant pool memory above,
18945 the operand may actually have a different mode now. That's
18946 ok, since we want to pun this all the way back to an integer. */
18947 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18948 gcc_assert (operand != NULL);
18954 if (mode == DImode)
18955 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18960 if (REG_P (operand))
18962 gcc_assert (reload_completed);
18963 for (i = 0; i < size; i++)
18964 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18966 else if (offsettable_memref_p (operand))
18968 operand = adjust_address (operand, SImode, 0);
18969 parts[0] = operand;
18970 for (i = 1; i < size; i++)
18971 parts[i] = adjust_address (operand, SImode, 4 * i);
18973 else if (GET_CODE (operand) == CONST_DOUBLE)
18978 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18982 real_to_target (l, &r, mode);
18983 parts[3] = gen_int_mode (l[3], SImode);
18984 parts[2] = gen_int_mode (l[2], SImode);
18987 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18988 parts[2] = gen_int_mode (l[2], SImode);
18991 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18994 gcc_unreachable ();
18996 parts[1] = gen_int_mode (l[1], SImode);
18997 parts[0] = gen_int_mode (l[0], SImode);
19000 gcc_unreachable ();
19005 if (mode == TImode)
19006 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19007 if (mode == XFmode || mode == TFmode)
19009 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19010 if (REG_P (operand))
19012 gcc_assert (reload_completed);
19013 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19014 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19016 else if (offsettable_memref_p (operand))
19018 operand = adjust_address (operand, DImode, 0);
19019 parts[0] = operand;
19020 parts[1] = adjust_address (operand, upper_mode, 8);
19022 else if (GET_CODE (operand) == CONST_DOUBLE)
19027 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19028 real_to_target (l, &r, mode);
19030 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19031 if (HOST_BITS_PER_WIDE_INT >= 64)
19034 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19035 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19038 parts[0] = immed_double_const (l[0], l[1], DImode);
19040 if (upper_mode == SImode)
19041 parts[1] = gen_int_mode (l[2], SImode);
19042 else if (HOST_BITS_PER_WIDE_INT >= 64)
19045 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19046 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19049 parts[1] = immed_double_const (l[2], l[3], DImode);
19052 gcc_unreachable ();
19059 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19060 Return false when normal moves are needed; true when all required
19061 insns have been emitted. Operands 2-4 contain the input values
19062 int the correct order; operands 5-7 contain the output values. */
19065 ix86_split_long_move (rtx operands[])
19070 int collisions = 0;
19071 enum machine_mode mode = GET_MODE (operands[0]);
19072 bool collisionparts[4];
19074 /* The DFmode expanders may ask us to move double.
19075 For 64bit target this is single move. By hiding the fact
19076 here we simplify i386.md splitters. */
19077 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19079 /* Optimize constant pool reference to immediates. This is used by
19080 fp moves, that force all constants to memory to allow combining. */
19082 if (MEM_P (operands[1])
19083 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19084 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19085 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19086 if (push_operand (operands[0], VOIDmode))
19088 operands[0] = copy_rtx (operands[0]);
19089 PUT_MODE (operands[0], Pmode);
19092 operands[0] = gen_lowpart (DImode, operands[0]);
19093 operands[1] = gen_lowpart (DImode, operands[1]);
19094 emit_move_insn (operands[0], operands[1]);
19098 /* The only non-offsettable memory we handle is push. */
19099 if (push_operand (operands[0], VOIDmode))
19102 gcc_assert (!MEM_P (operands[0])
19103 || offsettable_memref_p (operands[0]));
19105 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19106 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19108 /* When emitting push, take care for source operands on the stack. */
19109 if (push && MEM_P (operands[1])
19110 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19112 rtx src_base = XEXP (part[1][nparts - 1], 0);
19114 /* Compensate for the stack decrement by 4. */
19115 if (!TARGET_64BIT && nparts == 3
19116 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19117 src_base = plus_constant (src_base, 4);
19119 /* src_base refers to the stack pointer and is
19120 automatically decreased by emitted push. */
19121 for (i = 0; i < nparts; i++)
19122 part[1][i] = change_address (part[1][i],
19123 GET_MODE (part[1][i]), src_base);
19126 /* We need to do copy in the right order in case an address register
19127 of the source overlaps the destination. */
19128 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19132 for (i = 0; i < nparts; i++)
19135 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19136 if (collisionparts[i])
19140 /* Collision in the middle part can be handled by reordering. */
19141 if (collisions == 1 && nparts == 3 && collisionparts [1])
19143 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19144 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19146 else if (collisions == 1
19148 && (collisionparts [1] || collisionparts [2]))
19150 if (collisionparts [1])
19152 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19153 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19157 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19158 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19162 /* If there are more collisions, we can't handle it by reordering.
19163 Do an lea to the last part and use only one colliding move. */
19164 else if (collisions > 1)
19170 base = part[0][nparts - 1];
19172 /* Handle the case when the last part isn't valid for lea.
19173 Happens in 64-bit mode storing the 12-byte XFmode. */
19174 if (GET_MODE (base) != Pmode)
19175 base = gen_rtx_REG (Pmode, REGNO (base));
19177 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19178 part[1][0] = replace_equiv_address (part[1][0], base);
19179 for (i = 1; i < nparts; i++)
19181 tmp = plus_constant (base, UNITS_PER_WORD * i);
19182 part[1][i] = replace_equiv_address (part[1][i], tmp);
19193 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19194 emit_insn (gen_addsi3 (stack_pointer_rtx,
19195 stack_pointer_rtx, GEN_INT (-4)));
19196 emit_move_insn (part[0][2], part[1][2]);
19198 else if (nparts == 4)
19200 emit_move_insn (part[0][3], part[1][3]);
19201 emit_move_insn (part[0][2], part[1][2]);
19206 /* In 64bit mode we don't have 32bit push available. In case this is
19207 register, it is OK - we will just use larger counterpart. We also
19208 retype memory - these comes from attempt to avoid REX prefix on
19209 moving of second half of TFmode value. */
19210 if (GET_MODE (part[1][1]) == SImode)
19212 switch (GET_CODE (part[1][1]))
19215 part[1][1] = adjust_address (part[1][1], DImode, 0);
19219 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19223 gcc_unreachable ();
19226 if (GET_MODE (part[1][0]) == SImode)
19227 part[1][0] = part[1][1];
19230 emit_move_insn (part[0][1], part[1][1]);
19231 emit_move_insn (part[0][0], part[1][0]);
19235 /* Choose correct order to not overwrite the source before it is copied. */
19236 if ((REG_P (part[0][0])
19237 && REG_P (part[1][1])
19238 && (REGNO (part[0][0]) == REGNO (part[1][1])
19240 && REGNO (part[0][0]) == REGNO (part[1][2]))
19242 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19244 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19246 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19248 operands[2 + i] = part[0][j];
19249 operands[6 + i] = part[1][j];
19254 for (i = 0; i < nparts; i++)
19256 operands[2 + i] = part[0][i];
19257 operands[6 + i] = part[1][i];
19261 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19262 if (optimize_insn_for_size_p ())
19264 for (j = 0; j < nparts - 1; j++)
19265 if (CONST_INT_P (operands[6 + j])
19266 && operands[6 + j] != const0_rtx
19267 && REG_P (operands[2 + j]))
19268 for (i = j; i < nparts - 1; i++)
19269 if (CONST_INT_P (operands[7 + i])
19270 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19271 operands[7 + i] = operands[2 + j];
19274 for (i = 0; i < nparts; i++)
19275 emit_move_insn (operands[2 + i], operands[6 + i]);
19280 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19281 left shift by a constant, either using a single shift or
19282 a sequence of add instructions. */
19285 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19287 rtx (*insn)(rtx, rtx, rtx);
19290 || (count * ix86_cost->add <= ix86_cost->shift_const
19291 && !optimize_insn_for_size_p ()))
19293 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19294 while (count-- > 0)
19295 emit_insn (insn (operand, operand, operand));
19299 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19300 emit_insn (insn (operand, operand, GEN_INT (count)));
19305 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19307 rtx (*gen_ashl3)(rtx, rtx, rtx);
19308 rtx (*gen_shld)(rtx, rtx, rtx);
19309 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19311 rtx low[2], high[2];
19314 if (CONST_INT_P (operands[2]))
19316 split_double_mode (mode, operands, 2, low, high);
19317 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19319 if (count >= half_width)
19321 emit_move_insn (high[0], low[1]);
19322 emit_move_insn (low[0], const0_rtx);
19324 if (count > half_width)
19325 ix86_expand_ashl_const (high[0], count - half_width, mode);
19329 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19331 if (!rtx_equal_p (operands[0], operands[1]))
19332 emit_move_insn (operands[0], operands[1]);
19334 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19335 ix86_expand_ashl_const (low[0], count, mode);
19340 split_double_mode (mode, operands, 1, low, high);
19342 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19344 if (operands[1] == const1_rtx)
19346 /* Assuming we've chosen a QImode capable registers, then 1 << N
19347 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19348 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19350 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19352 ix86_expand_clear (low[0]);
19353 ix86_expand_clear (high[0]);
19354 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19356 d = gen_lowpart (QImode, low[0]);
19357 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19358 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19359 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19361 d = gen_lowpart (QImode, high[0]);
19362 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19363 s = gen_rtx_NE (QImode, flags, const0_rtx);
19364 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19367 /* Otherwise, we can get the same results by manually performing
19368 a bit extract operation on bit 5/6, and then performing the two
19369 shifts. The two methods of getting 0/1 into low/high are exactly
19370 the same size. Avoiding the shift in the bit extract case helps
19371 pentium4 a bit; no one else seems to care much either way. */
19374 enum machine_mode half_mode;
19375 rtx (*gen_lshr3)(rtx, rtx, rtx);
19376 rtx (*gen_and3)(rtx, rtx, rtx);
19377 rtx (*gen_xor3)(rtx, rtx, rtx);
19378 HOST_WIDE_INT bits;
19381 if (mode == DImode)
19383 half_mode = SImode;
19384 gen_lshr3 = gen_lshrsi3;
19385 gen_and3 = gen_andsi3;
19386 gen_xor3 = gen_xorsi3;
19391 half_mode = DImode;
19392 gen_lshr3 = gen_lshrdi3;
19393 gen_and3 = gen_anddi3;
19394 gen_xor3 = gen_xordi3;
19398 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19399 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19401 x = gen_lowpart (half_mode, operands[2]);
19402 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19404 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19405 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19406 emit_move_insn (low[0], high[0]);
19407 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19410 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19411 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19415 if (operands[1] == constm1_rtx)
19417 /* For -1 << N, we can avoid the shld instruction, because we
19418 know that we're shifting 0...31/63 ones into a -1. */
19419 emit_move_insn (low[0], constm1_rtx);
19420 if (optimize_insn_for_size_p ())
19421 emit_move_insn (high[0], low[0]);
19423 emit_move_insn (high[0], constm1_rtx);
19427 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19429 if (!rtx_equal_p (operands[0], operands[1]))
19430 emit_move_insn (operands[0], operands[1]);
19432 split_double_mode (mode, operands, 1, low, high);
19433 emit_insn (gen_shld (high[0], low[0], operands[2]));
19436 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19438 if (TARGET_CMOVE && scratch)
19440 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19441 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19443 ix86_expand_clear (scratch);
19444 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19448 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19449 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19451 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19456 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19458 rtx (*gen_ashr3)(rtx, rtx, rtx)
19459 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19460 rtx (*gen_shrd)(rtx, rtx, rtx);
19461 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19463 rtx low[2], high[2];
19466 if (CONST_INT_P (operands[2]))
19468 split_double_mode (mode, operands, 2, low, high);
19469 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19471 if (count == GET_MODE_BITSIZE (mode) - 1)
19473 emit_move_insn (high[0], high[1]);
19474 emit_insn (gen_ashr3 (high[0], high[0],
19475 GEN_INT (half_width - 1)));
19476 emit_move_insn (low[0], high[0]);
19479 else if (count >= half_width)
19481 emit_move_insn (low[0], high[1]);
19482 emit_move_insn (high[0], low[0]);
19483 emit_insn (gen_ashr3 (high[0], high[0],
19484 GEN_INT (half_width - 1)));
19486 if (count > half_width)
19487 emit_insn (gen_ashr3 (low[0], low[0],
19488 GEN_INT (count - half_width)));
19492 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19494 if (!rtx_equal_p (operands[0], operands[1]))
19495 emit_move_insn (operands[0], operands[1]);
19497 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19498 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19503 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19505 if (!rtx_equal_p (operands[0], operands[1]))
19506 emit_move_insn (operands[0], operands[1]);
19508 split_double_mode (mode, operands, 1, low, high);
19510 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19511 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19513 if (TARGET_CMOVE && scratch)
19515 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19516 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19518 emit_move_insn (scratch, high[0]);
19519 emit_insn (gen_ashr3 (scratch, scratch,
19520 GEN_INT (half_width - 1)));
19521 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19526 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19527 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19529 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19535 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19537 rtx (*gen_lshr3)(rtx, rtx, rtx)
19538 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19539 rtx (*gen_shrd)(rtx, rtx, rtx);
19540 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19542 rtx low[2], high[2];
19545 if (CONST_INT_P (operands[2]))
19547 split_double_mode (mode, operands, 2, low, high);
19548 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19550 if (count >= half_width)
19552 emit_move_insn (low[0], high[1]);
19553 ix86_expand_clear (high[0]);
19555 if (count > half_width)
19556 emit_insn (gen_lshr3 (low[0], low[0],
19557 GEN_INT (count - half_width)));
19561 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19563 if (!rtx_equal_p (operands[0], operands[1]))
19564 emit_move_insn (operands[0], operands[1]);
19566 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19567 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19572 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19574 if (!rtx_equal_p (operands[0], operands[1]))
19575 emit_move_insn (operands[0], operands[1]);
19577 split_double_mode (mode, operands, 1, low, high);
19579 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19580 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19582 if (TARGET_CMOVE && scratch)
19584 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19585 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19587 ix86_expand_clear (scratch);
19588 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19593 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19594 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19596 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19601 /* Predict just emitted jump instruction to be taken with probability PROB. */
19603 predict_jump (int prob)
19605 rtx insn = get_last_insn ();
19606 gcc_assert (JUMP_P (insn));
19607 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19610 /* Helper function for the string operations below. Dest VARIABLE whether
19611 it is aligned to VALUE bytes. If true, jump to the label. */
19613 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19615 rtx label = gen_label_rtx ();
19616 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19617 if (GET_MODE (variable) == DImode)
19618 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19620 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19621 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19624 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19626 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19630 /* Adjust COUNTER by the VALUE. */
19632 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19634 rtx (*gen_add)(rtx, rtx, rtx)
19635 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19637 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19640 /* Zero extend possibly SImode EXP to Pmode register. */
19642 ix86_zero_extend_to_Pmode (rtx exp)
19645 if (GET_MODE (exp) == VOIDmode)
19646 return force_reg (Pmode, exp);
19647 if (GET_MODE (exp) == Pmode)
19648 return copy_to_mode_reg (Pmode, exp);
19649 r = gen_reg_rtx (Pmode);
19650 emit_insn (gen_zero_extendsidi2 (r, exp));
19654 /* Divide COUNTREG by SCALE. */
19656 scale_counter (rtx countreg, int scale)
19662 if (CONST_INT_P (countreg))
19663 return GEN_INT (INTVAL (countreg) / scale);
19664 gcc_assert (REG_P (countreg));
19666 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19667 GEN_INT (exact_log2 (scale)),
19668 NULL, 1, OPTAB_DIRECT);
19672 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19673 DImode for constant loop counts. */
19675 static enum machine_mode
19676 counter_mode (rtx count_exp)
19678 if (GET_MODE (count_exp) != VOIDmode)
19679 return GET_MODE (count_exp);
19680 if (!CONST_INT_P (count_exp))
19682 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19687 /* When SRCPTR is non-NULL, output simple loop to move memory
19688 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19689 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19690 equivalent loop to set memory by VALUE (supposed to be in MODE).
19692 The size is rounded down to whole number of chunk size moved at once.
19693 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19697 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19698 rtx destptr, rtx srcptr, rtx value,
19699 rtx count, enum machine_mode mode, int unroll,
19702 rtx out_label, top_label, iter, tmp;
19703 enum machine_mode iter_mode = counter_mode (count);
19704 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19705 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19711 top_label = gen_label_rtx ();
19712 out_label = gen_label_rtx ();
19713 iter = gen_reg_rtx (iter_mode);
19715 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19716 NULL, 1, OPTAB_DIRECT);
19717 /* Those two should combine. */
19718 if (piece_size == const1_rtx)
19720 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19722 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19724 emit_move_insn (iter, const0_rtx);
19726 emit_label (top_label);
19728 tmp = convert_modes (Pmode, iter_mode, iter, true);
19729 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19730 destmem = change_address (destmem, mode, x_addr);
19734 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19735 srcmem = change_address (srcmem, mode, y_addr);
19737 /* When unrolling for chips that reorder memory reads and writes,
19738 we can save registers by using single temporary.
19739 Also using 4 temporaries is overkill in 32bit mode. */
19740 if (!TARGET_64BIT && 0)
19742 for (i = 0; i < unroll; i++)
19747 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19749 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19751 emit_move_insn (destmem, srcmem);
19757 gcc_assert (unroll <= 4);
19758 for (i = 0; i < unroll; i++)
19760 tmpreg[i] = gen_reg_rtx (mode);
19764 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19766 emit_move_insn (tmpreg[i], srcmem);
19768 for (i = 0; i < unroll; i++)
19773 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19775 emit_move_insn (destmem, tmpreg[i]);
19780 for (i = 0; i < unroll; i++)
19784 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19785 emit_move_insn (destmem, value);
19788 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19789 true, OPTAB_LIB_WIDEN);
19791 emit_move_insn (iter, tmp);
19793 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19795 if (expected_size != -1)
19797 expected_size /= GET_MODE_SIZE (mode) * unroll;
19798 if (expected_size == 0)
19800 else if (expected_size > REG_BR_PROB_BASE)
19801 predict_jump (REG_BR_PROB_BASE - 1);
19803 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19806 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19807 iter = ix86_zero_extend_to_Pmode (iter);
19808 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19809 true, OPTAB_LIB_WIDEN);
19810 if (tmp != destptr)
19811 emit_move_insn (destptr, tmp);
19814 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19815 true, OPTAB_LIB_WIDEN);
19817 emit_move_insn (srcptr, tmp);
19819 emit_label (out_label);
19822 /* Output "rep; mov" instruction.
19823 Arguments have same meaning as for previous function */
19825 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19826 rtx destptr, rtx srcptr,
19828 enum machine_mode mode)
19833 HOST_WIDE_INT rounded_count;
19835 /* If the size is known, it is shorter to use rep movs. */
19836 if (mode == QImode && CONST_INT_P (count)
19837 && !(INTVAL (count) & 3))
19840 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19841 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19842 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19843 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19844 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19845 if (mode != QImode)
19847 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19848 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19849 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19850 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19851 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19852 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19856 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19857 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19859 if (CONST_INT_P (count))
19861 rounded_count = (INTVAL (count)
19862 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19863 destmem = shallow_copy_rtx (destmem);
19864 srcmem = shallow_copy_rtx (srcmem);
19865 set_mem_size (destmem, rounded_count);
19866 set_mem_size (srcmem, rounded_count);
19870 if (MEM_SIZE_KNOWN_P (destmem))
19871 clear_mem_size (destmem);
19872 if (MEM_SIZE_KNOWN_P (srcmem))
19873 clear_mem_size (srcmem);
19875 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19879 /* Output "rep; stos" instruction.
19880 Arguments have same meaning as for previous function */
19882 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19883 rtx count, enum machine_mode mode,
19888 HOST_WIDE_INT rounded_count;
19890 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19891 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19892 value = force_reg (mode, gen_lowpart (mode, value));
19893 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19894 if (mode != QImode)
19896 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19897 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19898 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19901 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19902 if (orig_value == const0_rtx && CONST_INT_P (count))
19904 rounded_count = (INTVAL (count)
19905 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19906 destmem = shallow_copy_rtx (destmem);
19907 set_mem_size (destmem, rounded_count);
19909 else if (MEM_SIZE_KNOWN_P (destmem))
19910 clear_mem_size (destmem);
19911 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19915 emit_strmov (rtx destmem, rtx srcmem,
19916 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19918 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19919 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19920 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19923 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19925 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19926 rtx destptr, rtx srcptr, rtx count, int max_size)
19929 if (CONST_INT_P (count))
19931 HOST_WIDE_INT countval = INTVAL (count);
19934 if ((countval & 0x10) && max_size > 16)
19938 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19939 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19942 gcc_unreachable ();
19945 if ((countval & 0x08) && max_size > 8)
19948 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19951 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19952 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19956 if ((countval & 0x04) && max_size > 4)
19958 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19961 if ((countval & 0x02) && max_size > 2)
19963 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19966 if ((countval & 0x01) && max_size > 1)
19968 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19975 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19976 count, 1, OPTAB_DIRECT);
19977 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19978 count, QImode, 1, 4);
19982 /* When there are stringops, we can cheaply increase dest and src pointers.
19983 Otherwise we save code size by maintaining offset (zero is readily
19984 available from preceding rep operation) and using x86 addressing modes.
19986 if (TARGET_SINGLE_STRINGOP)
19990 rtx label = ix86_expand_aligntest (count, 4, true);
19991 src = change_address (srcmem, SImode, srcptr);
19992 dest = change_address (destmem, SImode, destptr);
19993 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19994 emit_label (label);
19995 LABEL_NUSES (label) = 1;
19999 rtx label = ix86_expand_aligntest (count, 2, true);
20000 src = change_address (srcmem, HImode, srcptr);
20001 dest = change_address (destmem, HImode, destptr);
20002 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20003 emit_label (label);
20004 LABEL_NUSES (label) = 1;
20008 rtx label = ix86_expand_aligntest (count, 1, true);
20009 src = change_address (srcmem, QImode, srcptr);
20010 dest = change_address (destmem, QImode, destptr);
20011 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20012 emit_label (label);
20013 LABEL_NUSES (label) = 1;
20018 rtx offset = force_reg (Pmode, const0_rtx);
20023 rtx label = ix86_expand_aligntest (count, 4, true);
20024 src = change_address (srcmem, SImode, srcptr);
20025 dest = change_address (destmem, SImode, destptr);
20026 emit_move_insn (dest, src);
20027 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20028 true, OPTAB_LIB_WIDEN);
20030 emit_move_insn (offset, tmp);
20031 emit_label (label);
20032 LABEL_NUSES (label) = 1;
20036 rtx label = ix86_expand_aligntest (count, 2, true);
20037 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20038 src = change_address (srcmem, HImode, tmp);
20039 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20040 dest = change_address (destmem, HImode, tmp);
20041 emit_move_insn (dest, src);
20042 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20043 true, OPTAB_LIB_WIDEN);
20045 emit_move_insn (offset, tmp);
20046 emit_label (label);
20047 LABEL_NUSES (label) = 1;
20051 rtx label = ix86_expand_aligntest (count, 1, true);
20052 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20053 src = change_address (srcmem, QImode, tmp);
20054 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20055 dest = change_address (destmem, QImode, tmp);
20056 emit_move_insn (dest, src);
20057 emit_label (label);
20058 LABEL_NUSES (label) = 1;
20063 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20065 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20066 rtx count, int max_size)
20069 expand_simple_binop (counter_mode (count), AND, count,
20070 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20071 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20072 gen_lowpart (QImode, value), count, QImode,
20076 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20078 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20082 if (CONST_INT_P (count))
20084 HOST_WIDE_INT countval = INTVAL (count);
20087 if ((countval & 0x10) && max_size > 16)
20091 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20092 emit_insn (gen_strset (destptr, dest, value));
20093 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20094 emit_insn (gen_strset (destptr, dest, value));
20097 gcc_unreachable ();
20100 if ((countval & 0x08) && max_size > 8)
20104 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20105 emit_insn (gen_strset (destptr, dest, value));
20109 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20110 emit_insn (gen_strset (destptr, dest, value));
20111 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20112 emit_insn (gen_strset (destptr, dest, value));
20116 if ((countval & 0x04) && max_size > 4)
20118 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20119 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20122 if ((countval & 0x02) && max_size > 2)
20124 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20125 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20128 if ((countval & 0x01) && max_size > 1)
20130 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20131 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20138 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20143 rtx label = ix86_expand_aligntest (count, 16, true);
20146 dest = change_address (destmem, DImode, destptr);
20147 emit_insn (gen_strset (destptr, dest, value));
20148 emit_insn (gen_strset (destptr, dest, value));
20152 dest = change_address (destmem, SImode, destptr);
20153 emit_insn (gen_strset (destptr, dest, value));
20154 emit_insn (gen_strset (destptr, dest, value));
20155 emit_insn (gen_strset (destptr, dest, value));
20156 emit_insn (gen_strset (destptr, dest, value));
20158 emit_label (label);
20159 LABEL_NUSES (label) = 1;
20163 rtx label = ix86_expand_aligntest (count, 8, true);
20166 dest = change_address (destmem, DImode, destptr);
20167 emit_insn (gen_strset (destptr, dest, value));
20171 dest = change_address (destmem, SImode, destptr);
20172 emit_insn (gen_strset (destptr, dest, value));
20173 emit_insn (gen_strset (destptr, dest, value));
20175 emit_label (label);
20176 LABEL_NUSES (label) = 1;
20180 rtx label = ix86_expand_aligntest (count, 4, true);
20181 dest = change_address (destmem, SImode, destptr);
20182 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20183 emit_label (label);
20184 LABEL_NUSES (label) = 1;
20188 rtx label = ix86_expand_aligntest (count, 2, true);
20189 dest = change_address (destmem, HImode, destptr);
20190 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20191 emit_label (label);
20192 LABEL_NUSES (label) = 1;
20196 rtx label = ix86_expand_aligntest (count, 1, true);
20197 dest = change_address (destmem, QImode, destptr);
20198 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20199 emit_label (label);
20200 LABEL_NUSES (label) = 1;
20204 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20205 DESIRED_ALIGNMENT. */
20207 expand_movmem_prologue (rtx destmem, rtx srcmem,
20208 rtx destptr, rtx srcptr, rtx count,
20209 int align, int desired_alignment)
20211 if (align <= 1 && desired_alignment > 1)
20213 rtx label = ix86_expand_aligntest (destptr, 1, false);
20214 srcmem = change_address (srcmem, QImode, srcptr);
20215 destmem = change_address (destmem, QImode, destptr);
20216 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20217 ix86_adjust_counter (count, 1);
20218 emit_label (label);
20219 LABEL_NUSES (label) = 1;
20221 if (align <= 2 && desired_alignment > 2)
20223 rtx label = ix86_expand_aligntest (destptr, 2, false);
20224 srcmem = change_address (srcmem, HImode, srcptr);
20225 destmem = change_address (destmem, HImode, destptr);
20226 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20227 ix86_adjust_counter (count, 2);
20228 emit_label (label);
20229 LABEL_NUSES (label) = 1;
20231 if (align <= 4 && desired_alignment > 4)
20233 rtx label = ix86_expand_aligntest (destptr, 4, false);
20234 srcmem = change_address (srcmem, SImode, srcptr);
20235 destmem = change_address (destmem, SImode, destptr);
20236 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20237 ix86_adjust_counter (count, 4);
20238 emit_label (label);
20239 LABEL_NUSES (label) = 1;
20241 gcc_assert (desired_alignment <= 8);
20244 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20245 ALIGN_BYTES is how many bytes need to be copied. */
20247 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20248 int desired_align, int align_bytes)
20251 rtx orig_dst = dst;
20252 rtx orig_src = src;
20254 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20255 if (src_align_bytes >= 0)
20256 src_align_bytes = desired_align - src_align_bytes;
20257 if (align_bytes & 1)
20259 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20260 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20262 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20264 if (align_bytes & 2)
20266 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20267 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20268 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20269 set_mem_align (dst, 2 * BITS_PER_UNIT);
20270 if (src_align_bytes >= 0
20271 && (src_align_bytes & 1) == (align_bytes & 1)
20272 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20273 set_mem_align (src, 2 * BITS_PER_UNIT);
20275 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20277 if (align_bytes & 4)
20279 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20280 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20281 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20282 set_mem_align (dst, 4 * BITS_PER_UNIT);
20283 if (src_align_bytes >= 0)
20285 unsigned int src_align = 0;
20286 if ((src_align_bytes & 3) == (align_bytes & 3))
20288 else if ((src_align_bytes & 1) == (align_bytes & 1))
20290 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20291 set_mem_align (src, src_align * BITS_PER_UNIT);
20294 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20296 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20297 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20298 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20299 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20300 if (src_align_bytes >= 0)
20302 unsigned int src_align = 0;
20303 if ((src_align_bytes & 7) == (align_bytes & 7))
20305 else if ((src_align_bytes & 3) == (align_bytes & 3))
20307 else if ((src_align_bytes & 1) == (align_bytes & 1))
20309 if (src_align > (unsigned int) desired_align)
20310 src_align = desired_align;
20311 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20312 set_mem_align (src, src_align * BITS_PER_UNIT);
20314 if (MEM_SIZE_KNOWN_P (orig_dst))
20315 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20316 if (MEM_SIZE_KNOWN_P (orig_src))
20317 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
20322 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20323 DESIRED_ALIGNMENT. */
20325 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20326 int align, int desired_alignment)
20328 if (align <= 1 && desired_alignment > 1)
20330 rtx label = ix86_expand_aligntest (destptr, 1, false);
20331 destmem = change_address (destmem, QImode, destptr);
20332 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20333 ix86_adjust_counter (count, 1);
20334 emit_label (label);
20335 LABEL_NUSES (label) = 1;
20337 if (align <= 2 && desired_alignment > 2)
20339 rtx label = ix86_expand_aligntest (destptr, 2, false);
20340 destmem = change_address (destmem, HImode, destptr);
20341 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20342 ix86_adjust_counter (count, 2);
20343 emit_label (label);
20344 LABEL_NUSES (label) = 1;
20346 if (align <= 4 && desired_alignment > 4)
20348 rtx label = ix86_expand_aligntest (destptr, 4, false);
20349 destmem = change_address (destmem, SImode, destptr);
20350 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20351 ix86_adjust_counter (count, 4);
20352 emit_label (label);
20353 LABEL_NUSES (label) = 1;
20355 gcc_assert (desired_alignment <= 8);
20358 /* Set enough from DST to align DST known to by aligned by ALIGN to
20359 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20361 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20362 int desired_align, int align_bytes)
20365 rtx orig_dst = dst;
20366 if (align_bytes & 1)
20368 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20370 emit_insn (gen_strset (destreg, dst,
20371 gen_lowpart (QImode, value)));
20373 if (align_bytes & 2)
20375 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20376 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20377 set_mem_align (dst, 2 * BITS_PER_UNIT);
20379 emit_insn (gen_strset (destreg, dst,
20380 gen_lowpart (HImode, value)));
20382 if (align_bytes & 4)
20384 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20385 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20386 set_mem_align (dst, 4 * BITS_PER_UNIT);
20388 emit_insn (gen_strset (destreg, dst,
20389 gen_lowpart (SImode, value)));
20391 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20392 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20393 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20394 if (MEM_SIZE_KNOWN_P (orig_dst))
20395 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20399 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20400 static enum stringop_alg
20401 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20402 int *dynamic_check)
20404 const struct stringop_algs * algs;
20405 bool optimize_for_speed;
20406 /* Algorithms using the rep prefix want at least edi and ecx;
20407 additionally, memset wants eax and memcpy wants esi. Don't
20408 consider such algorithms if the user has appropriated those
20409 registers for their own purposes. */
20410 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20412 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20414 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20415 || (alg != rep_prefix_1_byte \
20416 && alg != rep_prefix_4_byte \
20417 && alg != rep_prefix_8_byte))
20418 const struct processor_costs *cost;
20420 /* Even if the string operation call is cold, we still might spend a lot
20421 of time processing large blocks. */
20422 if (optimize_function_for_size_p (cfun)
20423 || (optimize_insn_for_size_p ()
20424 && expected_size != -1 && expected_size < 256))
20425 optimize_for_speed = false;
20427 optimize_for_speed = true;
20429 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20431 *dynamic_check = -1;
20433 algs = &cost->memset[TARGET_64BIT != 0];
20435 algs = &cost->memcpy[TARGET_64BIT != 0];
20436 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
20437 return ix86_stringop_alg;
20438 /* rep; movq or rep; movl is the smallest variant. */
20439 else if (!optimize_for_speed)
20441 if (!count || (count & 3))
20442 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20444 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20446 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20448 else if (expected_size != -1 && expected_size < 4)
20449 return loop_1_byte;
20450 else if (expected_size != -1)
20453 enum stringop_alg alg = libcall;
20454 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20456 /* We get here if the algorithms that were not libcall-based
20457 were rep-prefix based and we are unable to use rep prefixes
20458 based on global register usage. Break out of the loop and
20459 use the heuristic below. */
20460 if (algs->size[i].max == 0)
20462 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20464 enum stringop_alg candidate = algs->size[i].alg;
20466 if (candidate != libcall && ALG_USABLE_P (candidate))
20468 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20469 last non-libcall inline algorithm. */
20470 if (TARGET_INLINE_ALL_STRINGOPS)
20472 /* When the current size is best to be copied by a libcall,
20473 but we are still forced to inline, run the heuristic below
20474 that will pick code for medium sized blocks. */
20475 if (alg != libcall)
20479 else if (ALG_USABLE_P (candidate))
20483 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20485 /* When asked to inline the call anyway, try to pick meaningful choice.
20486 We look for maximal size of block that is faster to copy by hand and
20487 take blocks of at most of that size guessing that average size will
20488 be roughly half of the block.
20490 If this turns out to be bad, we might simply specify the preferred
20491 choice in ix86_costs. */
20492 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20493 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20496 enum stringop_alg alg;
20498 bool any_alg_usable_p = true;
20500 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20502 enum stringop_alg candidate = algs->size[i].alg;
20503 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20505 if (candidate != libcall && candidate
20506 && ALG_USABLE_P (candidate))
20507 max = algs->size[i].max;
20509 /* If there aren't any usable algorithms, then recursing on
20510 smaller sizes isn't going to find anything. Just return the
20511 simple byte-at-a-time copy loop. */
20512 if (!any_alg_usable_p)
20514 /* Pick something reasonable. */
20515 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20516 *dynamic_check = 128;
20517 return loop_1_byte;
20521 alg = decide_alg (count, max / 2, memset, dynamic_check);
20522 gcc_assert (*dynamic_check == -1);
20523 gcc_assert (alg != libcall);
20524 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20525 *dynamic_check = max;
20528 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20529 #undef ALG_USABLE_P
20532 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20533 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20535 decide_alignment (int align,
20536 enum stringop_alg alg,
20539 int desired_align = 0;
20543 gcc_unreachable ();
20545 case unrolled_loop:
20546 desired_align = GET_MODE_SIZE (Pmode);
20548 case rep_prefix_8_byte:
20551 case rep_prefix_4_byte:
20552 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20553 copying whole cacheline at once. */
20554 if (TARGET_PENTIUMPRO)
20559 case rep_prefix_1_byte:
20560 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20561 copying whole cacheline at once. */
20562 if (TARGET_PENTIUMPRO)
20576 if (desired_align < align)
20577 desired_align = align;
20578 if (expected_size != -1 && expected_size < 4)
20579 desired_align = align;
20580 return desired_align;
20583 /* Return the smallest power of 2 greater than VAL. */
20585 smallest_pow2_greater_than (int val)
20593 /* Expand string move (memcpy) operation. Use i386 string operations
20594 when profitable. expand_setmem contains similar code. The code
20595 depends upon architecture, block size and alignment, but always has
20596 the same overall structure:
20598 1) Prologue guard: Conditional that jumps up to epilogues for small
20599 blocks that can be handled by epilogue alone. This is faster
20600 but also needed for correctness, since prologue assume the block
20601 is larger than the desired alignment.
20603 Optional dynamic check for size and libcall for large
20604 blocks is emitted here too, with -minline-stringops-dynamically.
20606 2) Prologue: copy first few bytes in order to get destination
20607 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
20608 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
20609 copied. We emit either a jump tree on power of two sized
20610 blocks, or a byte loop.
20612 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20613 with specified algorithm.
20615 4) Epilogue: code copying tail of the block that is too small to be
20616 handled by main body (or up to size guarded by prologue guard). */
20619 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20620 rtx expected_align_exp, rtx expected_size_exp)
20626 rtx jump_around_label = NULL;
20627 HOST_WIDE_INT align = 1;
20628 unsigned HOST_WIDE_INT count = 0;
20629 HOST_WIDE_INT expected_size = -1;
20630 int size_needed = 0, epilogue_size_needed;
20631 int desired_align = 0, align_bytes = 0;
20632 enum stringop_alg alg;
20634 bool need_zero_guard = false;
20636 if (CONST_INT_P (align_exp))
20637 align = INTVAL (align_exp);
20638 /* i386 can do misaligned access on reasonably increased cost. */
20639 if (CONST_INT_P (expected_align_exp)
20640 && INTVAL (expected_align_exp) > align)
20641 align = INTVAL (expected_align_exp);
20642 /* ALIGN is the minimum of destination and source alignment, but we care here
20643 just about destination alignment. */
20644 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20645 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20647 if (CONST_INT_P (count_exp))
20648 count = expected_size = INTVAL (count_exp);
20649 if (CONST_INT_P (expected_size_exp) && count == 0)
20650 expected_size = INTVAL (expected_size_exp);
20652 /* Make sure we don't need to care about overflow later on. */
20653 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20656 /* Step 0: Decide on preferred algorithm, desired alignment and
20657 size of chunks to be copied by main loop. */
20659 alg = decide_alg (count, expected_size, false, &dynamic_check);
20660 desired_align = decide_alignment (align, alg, expected_size);
20662 if (!TARGET_ALIGN_STRINGOPS)
20663 align = desired_align;
20665 if (alg == libcall)
20667 gcc_assert (alg != no_stringop);
20669 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20670 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20671 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20676 gcc_unreachable ();
20678 need_zero_guard = true;
20679 size_needed = GET_MODE_SIZE (Pmode);
20681 case unrolled_loop:
20682 need_zero_guard = true;
20683 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20685 case rep_prefix_8_byte:
20688 case rep_prefix_4_byte:
20691 case rep_prefix_1_byte:
20695 need_zero_guard = true;
20700 epilogue_size_needed = size_needed;
20702 /* Step 1: Prologue guard. */
20704 /* Alignment code needs count to be in register. */
20705 if (CONST_INT_P (count_exp) && desired_align > align)
20707 if (INTVAL (count_exp) > desired_align
20708 && INTVAL (count_exp) > size_needed)
20711 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20712 if (align_bytes <= 0)
20715 align_bytes = desired_align - align_bytes;
20717 if (align_bytes == 0)
20718 count_exp = force_reg (counter_mode (count_exp), count_exp);
20720 gcc_assert (desired_align >= 1 && align >= 1);
20722 /* Ensure that alignment prologue won't copy past end of block. */
20723 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20725 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20726 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20727 Make sure it is power of 2. */
20728 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20732 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20734 /* If main algorithm works on QImode, no epilogue is needed.
20735 For small sizes just don't align anything. */
20736 if (size_needed == 1)
20737 desired_align = align;
20744 label = gen_label_rtx ();
20745 emit_cmp_and_jump_insns (count_exp,
20746 GEN_INT (epilogue_size_needed),
20747 LTU, 0, counter_mode (count_exp), 1, label);
20748 if (expected_size == -1 || expected_size < epilogue_size_needed)
20749 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20751 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20755 /* Emit code to decide on runtime whether library call or inline should be
20757 if (dynamic_check != -1)
20759 if (CONST_INT_P (count_exp))
20761 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20763 emit_block_move_via_libcall (dst, src, count_exp, false);
20764 count_exp = const0_rtx;
20770 rtx hot_label = gen_label_rtx ();
20771 jump_around_label = gen_label_rtx ();
20772 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20773 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20774 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20775 emit_block_move_via_libcall (dst, src, count_exp, false);
20776 emit_jump (jump_around_label);
20777 emit_label (hot_label);
20781 /* Step 2: Alignment prologue. */
20783 if (desired_align > align)
20785 if (align_bytes == 0)
20787 /* Except for the first move in epilogue, we no longer know
20788 constant offset in aliasing info. It don't seems to worth
20789 the pain to maintain it for the first move, so throw away
20791 src = change_address (src, BLKmode, srcreg);
20792 dst = change_address (dst, BLKmode, destreg);
20793 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20798 /* If we know how many bytes need to be stored before dst is
20799 sufficiently aligned, maintain aliasing info accurately. */
20800 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20801 desired_align, align_bytes);
20802 count_exp = plus_constant (count_exp, -align_bytes);
20803 count -= align_bytes;
20805 if (need_zero_guard
20806 && (count < (unsigned HOST_WIDE_INT) size_needed
20807 || (align_bytes == 0
20808 && count < ((unsigned HOST_WIDE_INT) size_needed
20809 + desired_align - align))))
20811 /* It is possible that we copied enough so the main loop will not
20813 gcc_assert (size_needed > 1);
20814 if (label == NULL_RTX)
20815 label = gen_label_rtx ();
20816 emit_cmp_and_jump_insns (count_exp,
20817 GEN_INT (size_needed),
20818 LTU, 0, counter_mode (count_exp), 1, label);
20819 if (expected_size == -1
20820 || expected_size < (desired_align - align) / 2 + size_needed)
20821 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20823 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20826 if (label && size_needed == 1)
20828 emit_label (label);
20829 LABEL_NUSES (label) = 1;
20831 epilogue_size_needed = 1;
20833 else if (label == NULL_RTX)
20834 epilogue_size_needed = size_needed;
20836 /* Step 3: Main loop. */
20842 gcc_unreachable ();
20844 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20845 count_exp, QImode, 1, expected_size);
20848 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20849 count_exp, Pmode, 1, expected_size);
20851 case unrolled_loop:
20852 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20853 registers for 4 temporaries anyway. */
20854 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20855 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20858 case rep_prefix_8_byte:
20859 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20862 case rep_prefix_4_byte:
20863 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20866 case rep_prefix_1_byte:
20867 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20871 /* Adjust properly the offset of src and dest memory for aliasing. */
20872 if (CONST_INT_P (count_exp))
20874 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20875 (count / size_needed) * size_needed);
20876 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20877 (count / size_needed) * size_needed);
20881 src = change_address (src, BLKmode, srcreg);
20882 dst = change_address (dst, BLKmode, destreg);
20885 /* Step 4: Epilogue to copy the remaining bytes. */
20889 /* When the main loop is done, COUNT_EXP might hold original count,
20890 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20891 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20892 bytes. Compensate if needed. */
20894 if (size_needed < epilogue_size_needed)
20897 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20898 GEN_INT (size_needed - 1), count_exp, 1,
20900 if (tmp != count_exp)
20901 emit_move_insn (count_exp, tmp);
20903 emit_label (label);
20904 LABEL_NUSES (label) = 1;
20907 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20908 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20909 epilogue_size_needed);
20910 if (jump_around_label)
20911 emit_label (jump_around_label);
20915 /* Helper function for memcpy. For QImode value 0xXY produce
20916 0xXYXYXYXY of wide specified by MODE. This is essentially
20917 a * 0x10101010, but we can do slightly better than
20918 synth_mult by unwinding the sequence by hand on CPUs with
20921 promote_duplicated_reg (enum machine_mode mode, rtx val)
20923 enum machine_mode valmode = GET_MODE (val);
20925 int nops = mode == DImode ? 3 : 2;
20927 gcc_assert (mode == SImode || mode == DImode);
20928 if (val == const0_rtx)
20929 return copy_to_mode_reg (mode, const0_rtx);
20930 if (CONST_INT_P (val))
20932 HOST_WIDE_INT v = INTVAL (val) & 255;
20936 if (mode == DImode)
20937 v |= (v << 16) << 16;
20938 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20941 if (valmode == VOIDmode)
20943 if (valmode != QImode)
20944 val = gen_lowpart (QImode, val);
20945 if (mode == QImode)
20947 if (!TARGET_PARTIAL_REG_STALL)
20949 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20950 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20951 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20952 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20954 rtx reg = convert_modes (mode, QImode, val, true);
20955 tmp = promote_duplicated_reg (mode, const1_rtx);
20956 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20961 rtx reg = convert_modes (mode, QImode, val, true);
20963 if (!TARGET_PARTIAL_REG_STALL)
20964 if (mode == SImode)
20965 emit_insn (gen_movsi_insv_1 (reg, reg));
20967 emit_insn (gen_movdi_insv_1 (reg, reg));
20970 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20971 NULL, 1, OPTAB_DIRECT);
20973 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20975 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20976 NULL, 1, OPTAB_DIRECT);
20977 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20978 if (mode == SImode)
20980 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20981 NULL, 1, OPTAB_DIRECT);
20982 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20987 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20988 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20989 alignment from ALIGN to DESIRED_ALIGN. */
20991 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20996 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20997 promoted_val = promote_duplicated_reg (DImode, val);
20998 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20999 promoted_val = promote_duplicated_reg (SImode, val);
21000 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21001 promoted_val = promote_duplicated_reg (HImode, val);
21003 promoted_val = val;
21005 return promoted_val;
21008 /* Expand string clear operation (bzero). Use i386 string operations when
21009 profitable. See expand_movmem comment for explanation of individual
21010 steps performed. */
21012 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21013 rtx expected_align_exp, rtx expected_size_exp)
21018 rtx jump_around_label = NULL;
21019 HOST_WIDE_INT align = 1;
21020 unsigned HOST_WIDE_INT count = 0;
21021 HOST_WIDE_INT expected_size = -1;
21022 int size_needed = 0, epilogue_size_needed;
21023 int desired_align = 0, align_bytes = 0;
21024 enum stringop_alg alg;
21025 rtx promoted_val = NULL;
21026 bool force_loopy_epilogue = false;
21028 bool need_zero_guard = false;
21030 if (CONST_INT_P (align_exp))
21031 align = INTVAL (align_exp);
21032 /* i386 can do misaligned access on reasonably increased cost. */
21033 if (CONST_INT_P (expected_align_exp)
21034 && INTVAL (expected_align_exp) > align)
21035 align = INTVAL (expected_align_exp);
21036 if (CONST_INT_P (count_exp))
21037 count = expected_size = INTVAL (count_exp);
21038 if (CONST_INT_P (expected_size_exp) && count == 0)
21039 expected_size = INTVAL (expected_size_exp);
21041 /* Make sure we don't need to care about overflow later on. */
21042 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21045 /* Step 0: Decide on preferred algorithm, desired alignment and
21046 size of chunks to be copied by main loop. */
21048 alg = decide_alg (count, expected_size, true, &dynamic_check);
21049 desired_align = decide_alignment (align, alg, expected_size);
21051 if (!TARGET_ALIGN_STRINGOPS)
21052 align = desired_align;
21054 if (alg == libcall)
21056 gcc_assert (alg != no_stringop);
21058 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21059 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21064 gcc_unreachable ();
21066 need_zero_guard = true;
21067 size_needed = GET_MODE_SIZE (Pmode);
21069 case unrolled_loop:
21070 need_zero_guard = true;
21071 size_needed = GET_MODE_SIZE (Pmode) * 4;
21073 case rep_prefix_8_byte:
21076 case rep_prefix_4_byte:
21079 case rep_prefix_1_byte:
21083 need_zero_guard = true;
21087 epilogue_size_needed = size_needed;
21089 /* Step 1: Prologue guard. */
21091 /* Alignment code needs count to be in register. */
21092 if (CONST_INT_P (count_exp) && desired_align > align)
21094 if (INTVAL (count_exp) > desired_align
21095 && INTVAL (count_exp) > size_needed)
21098 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21099 if (align_bytes <= 0)
21102 align_bytes = desired_align - align_bytes;
21104 if (align_bytes == 0)
21106 enum machine_mode mode = SImode;
21107 if (TARGET_64BIT && (count & ~0xffffffff))
21109 count_exp = force_reg (mode, count_exp);
21112 /* Do the cheap promotion to allow better CSE across the
21113 main loop and epilogue (ie one load of the big constant in the
21114 front of all code. */
21115 if (CONST_INT_P (val_exp))
21116 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21117 desired_align, align);
21118 /* Ensure that alignment prologue won't copy past end of block. */
21119 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21121 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21122 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21123 Make sure it is power of 2. */
21124 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21126 /* To improve performance of small blocks, we jump around the VAL
21127 promoting mode. This mean that if the promoted VAL is not constant,
21128 we might not use it in the epilogue and have to use byte
21130 if (epilogue_size_needed > 2 && !promoted_val)
21131 force_loopy_epilogue = true;
21134 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21136 /* If main algorithm works on QImode, no epilogue is needed.
21137 For small sizes just don't align anything. */
21138 if (size_needed == 1)
21139 desired_align = align;
21146 label = gen_label_rtx ();
21147 emit_cmp_and_jump_insns (count_exp,
21148 GEN_INT (epilogue_size_needed),
21149 LTU, 0, counter_mode (count_exp), 1, label);
21150 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21151 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21153 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21156 if (dynamic_check != -1)
21158 rtx hot_label = gen_label_rtx ();
21159 jump_around_label = gen_label_rtx ();
21160 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21161 LEU, 0, counter_mode (count_exp), 1, hot_label);
21162 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21163 set_storage_via_libcall (dst, count_exp, val_exp, false);
21164 emit_jump (jump_around_label);
21165 emit_label (hot_label);
21168 /* Step 2: Alignment prologue. */
21170 /* Do the expensive promotion once we branched off the small blocks. */
21172 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21173 desired_align, align);
21174 gcc_assert (desired_align >= 1 && align >= 1);
21176 if (desired_align > align)
21178 if (align_bytes == 0)
21180 /* Except for the first move in epilogue, we no longer know
21181 constant offset in aliasing info. It don't seems to worth
21182 the pain to maintain it for the first move, so throw away
21184 dst = change_address (dst, BLKmode, destreg);
21185 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21190 /* If we know how many bytes need to be stored before dst is
21191 sufficiently aligned, maintain aliasing info accurately. */
21192 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21193 desired_align, align_bytes);
21194 count_exp = plus_constant (count_exp, -align_bytes);
21195 count -= align_bytes;
21197 if (need_zero_guard
21198 && (count < (unsigned HOST_WIDE_INT) size_needed
21199 || (align_bytes == 0
21200 && count < ((unsigned HOST_WIDE_INT) size_needed
21201 + desired_align - align))))
21203 /* It is possible that we copied enough so the main loop will not
21205 gcc_assert (size_needed > 1);
21206 if (label == NULL_RTX)
21207 label = gen_label_rtx ();
21208 emit_cmp_and_jump_insns (count_exp,
21209 GEN_INT (size_needed),
21210 LTU, 0, counter_mode (count_exp), 1, label);
21211 if (expected_size == -1
21212 || expected_size < (desired_align - align) / 2 + size_needed)
21213 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21215 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21218 if (label && size_needed == 1)
21220 emit_label (label);
21221 LABEL_NUSES (label) = 1;
21223 promoted_val = val_exp;
21224 epilogue_size_needed = 1;
21226 else if (label == NULL_RTX)
21227 epilogue_size_needed = size_needed;
21229 /* Step 3: Main loop. */
21235 gcc_unreachable ();
21237 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21238 count_exp, QImode, 1, expected_size);
21241 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21242 count_exp, Pmode, 1, expected_size);
21244 case unrolled_loop:
21245 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21246 count_exp, Pmode, 4, expected_size);
21248 case rep_prefix_8_byte:
21249 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21252 case rep_prefix_4_byte:
21253 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21256 case rep_prefix_1_byte:
21257 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21261 /* Adjust properly the offset of src and dest memory for aliasing. */
21262 if (CONST_INT_P (count_exp))
21263 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21264 (count / size_needed) * size_needed);
21266 dst = change_address (dst, BLKmode, destreg);
21268 /* Step 4: Epilogue to copy the remaining bytes. */
21272 /* When the main loop is done, COUNT_EXP might hold original count,
21273 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21274 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21275 bytes. Compensate if needed. */
21277 if (size_needed < epilogue_size_needed)
21280 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21281 GEN_INT (size_needed - 1), count_exp, 1,
21283 if (tmp != count_exp)
21284 emit_move_insn (count_exp, tmp);
21286 emit_label (label);
21287 LABEL_NUSES (label) = 1;
21290 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21292 if (force_loopy_epilogue)
21293 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21294 epilogue_size_needed);
21296 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21297 epilogue_size_needed);
21299 if (jump_around_label)
21300 emit_label (jump_around_label);
21304 /* Expand the appropriate insns for doing strlen if not just doing
21307 out = result, initialized with the start address
21308 align_rtx = alignment of the address.
21309 scratch = scratch register, initialized with the startaddress when
21310 not aligned, otherwise undefined
21312 This is just the body. It needs the initializations mentioned above and
21313 some address computing at the end. These things are done in i386.md. */
21316 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21320 rtx align_2_label = NULL_RTX;
21321 rtx align_3_label = NULL_RTX;
21322 rtx align_4_label = gen_label_rtx ();
21323 rtx end_0_label = gen_label_rtx ();
21325 rtx tmpreg = gen_reg_rtx (SImode);
21326 rtx scratch = gen_reg_rtx (SImode);
21330 if (CONST_INT_P (align_rtx))
21331 align = INTVAL (align_rtx);
21333 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21335 /* Is there a known alignment and is it less than 4? */
21338 rtx scratch1 = gen_reg_rtx (Pmode);
21339 emit_move_insn (scratch1, out);
21340 /* Is there a known alignment and is it not 2? */
21343 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21344 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21346 /* Leave just the 3 lower bits. */
21347 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21348 NULL_RTX, 0, OPTAB_WIDEN);
21350 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21351 Pmode, 1, align_4_label);
21352 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21353 Pmode, 1, align_2_label);
21354 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21355 Pmode, 1, align_3_label);
21359 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21360 check if is aligned to 4 - byte. */
21362 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21363 NULL_RTX, 0, OPTAB_WIDEN);
21365 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21366 Pmode, 1, align_4_label);
21369 mem = change_address (src, QImode, out);
21371 /* Now compare the bytes. */
21373 /* Compare the first n unaligned byte on a byte per byte basis. */
21374 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21375 QImode, 1, end_0_label);
21377 /* Increment the address. */
21378 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21380 /* Not needed with an alignment of 2 */
21383 emit_label (align_2_label);
21385 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21388 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21390 emit_label (align_3_label);
21393 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21396 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21399 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21400 align this loop. It gives only huge programs, but does not help to
21402 emit_label (align_4_label);
21404 mem = change_address (src, SImode, out);
21405 emit_move_insn (scratch, mem);
21406 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21408 /* This formula yields a nonzero result iff one of the bytes is zero.
21409 This saves three branches inside loop and many cycles. */
21411 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21412 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21413 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21414 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21415 gen_int_mode (0x80808080, SImode)));
21416 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21421 rtx reg = gen_reg_rtx (SImode);
21422 rtx reg2 = gen_reg_rtx (Pmode);
21423 emit_move_insn (reg, tmpreg);
21424 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21426 /* If zero is not in the first two bytes, move two bytes forward. */
21427 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21428 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21429 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21430 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21431 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21434 /* Emit lea manually to avoid clobbering of flags. */
21435 emit_insn (gen_rtx_SET (SImode, reg2,
21436 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21438 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21439 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21440 emit_insn (gen_rtx_SET (VOIDmode, out,
21441 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21447 rtx end_2_label = gen_label_rtx ();
21448 /* Is zero in the first two bytes? */
21450 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21451 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21452 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21453 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21454 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21456 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21457 JUMP_LABEL (tmp) = end_2_label;
21459 /* Not in the first two. Move two bytes forward. */
21460 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21461 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21463 emit_label (end_2_label);
21467 /* Avoid branch in fixing the byte. */
21468 tmpreg = gen_lowpart (QImode, tmpreg);
21469 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21470 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21471 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21472 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21474 emit_label (end_0_label);
21477 /* Expand strlen. */
21480 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21482 rtx addr, scratch1, scratch2, scratch3, scratch4;
21484 /* The generic case of strlen expander is long. Avoid it's
21485 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21487 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21488 && !TARGET_INLINE_ALL_STRINGOPS
21489 && !optimize_insn_for_size_p ()
21490 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21493 addr = force_reg (Pmode, XEXP (src, 0));
21494 scratch1 = gen_reg_rtx (Pmode);
21496 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21497 && !optimize_insn_for_size_p ())
21499 /* Well it seems that some optimizer does not combine a call like
21500 foo(strlen(bar), strlen(bar));
21501 when the move and the subtraction is done here. It does calculate
21502 the length just once when these instructions are done inside of
21503 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21504 often used and I use one fewer register for the lifetime of
21505 output_strlen_unroll() this is better. */
21507 emit_move_insn (out, addr);
21509 ix86_expand_strlensi_unroll_1 (out, src, align);
21511 /* strlensi_unroll_1 returns the address of the zero at the end of
21512 the string, like memchr(), so compute the length by subtracting
21513 the start address. */
21514 emit_insn (ix86_gen_sub3 (out, out, addr));
21520 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21521 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21524 scratch2 = gen_reg_rtx (Pmode);
21525 scratch3 = gen_reg_rtx (Pmode);
21526 scratch4 = force_reg (Pmode, constm1_rtx);
21528 emit_move_insn (scratch3, addr);
21529 eoschar = force_reg (QImode, eoschar);
21531 src = replace_equiv_address_nv (src, scratch3);
21533 /* If .md starts supporting :P, this can be done in .md. */
21534 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21535 scratch4), UNSPEC_SCAS);
21536 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21537 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21538 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21543 /* For given symbol (function) construct code to compute address of it's PLT
21544 entry in large x86-64 PIC model. */
21546 construct_plt_address (rtx symbol)
21548 rtx tmp = gen_reg_rtx (Pmode);
21549 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21551 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21552 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21554 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21555 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21560 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21562 rtx pop, bool sibcall)
21564 /* We need to represent that SI and DI registers are clobbered
21566 static int clobbered_registers[] = {
21567 XMM6_REG, XMM7_REG, XMM8_REG,
21568 XMM9_REG, XMM10_REG, XMM11_REG,
21569 XMM12_REG, XMM13_REG, XMM14_REG,
21570 XMM15_REG, SI_REG, DI_REG
21572 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
21573 rtx use = NULL, call;
21574 unsigned int vec_len;
21576 if (pop == const0_rtx)
21578 gcc_assert (!TARGET_64BIT || !pop);
21580 if (TARGET_MACHO && !TARGET_64BIT)
21583 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21584 fnaddr = machopic_indirect_call_target (fnaddr);
21589 /* Static functions and indirect calls don't need the pic register. */
21590 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21591 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21592 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21593 use_reg (&use, pic_offset_table_rtx);
21596 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21598 rtx al = gen_rtx_REG (QImode, AX_REG);
21599 emit_move_insn (al, callarg2);
21600 use_reg (&use, al);
21603 if (ix86_cmodel == CM_LARGE_PIC
21605 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21606 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21607 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21609 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21610 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21612 fnaddr = XEXP (fnaddr, 0);
21613 if (GET_MODE (fnaddr) != Pmode)
21614 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
21615 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
21619 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21621 call = gen_rtx_SET (VOIDmode, retval, call);
21622 vec[vec_len++] = call;
21626 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21627 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21628 vec[vec_len++] = pop;
21631 if (TARGET_64BIT_MS_ABI
21632 && (!callarg2 || INTVAL (callarg2) != -2))
21636 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21637 UNSPEC_MS_TO_SYSV_CALL);
21639 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21641 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21643 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
21645 clobbered_registers[i]));
21648 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21649 if (TARGET_VZEROUPPER)
21652 if (cfun->machine->callee_pass_avx256_p)
21654 if (cfun->machine->callee_return_avx256_p)
21655 avx256 = callee_return_pass_avx256;
21657 avx256 = callee_pass_avx256;
21659 else if (cfun->machine->callee_return_avx256_p)
21660 avx256 = callee_return_avx256;
21662 avx256 = call_no_avx256;
21664 if (reload_completed)
21665 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21667 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
21668 gen_rtvec (1, GEN_INT (avx256)),
21669 UNSPEC_CALL_NEEDS_VZEROUPPER);
21673 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
21674 call = emit_call_insn (call);
21676 CALL_INSN_FUNCTION_USAGE (call) = use;
21682 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
21684 rtx pat = PATTERN (insn);
21685 rtvec vec = XVEC (pat, 0);
21686 int len = GET_NUM_ELEM (vec) - 1;
21688 /* Strip off the last entry of the parallel. */
21689 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
21690 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
21692 pat = RTVEC_ELT (vec, 0);
21694 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
21696 emit_insn (gen_avx_vzeroupper (vzeroupper));
21697 emit_call_insn (pat);
21700 /* Output the assembly for a call instruction. */
21703 ix86_output_call_insn (rtx insn, rtx call_op)
21705 bool direct_p = constant_call_address_operand (call_op, Pmode);
21706 bool seh_nop_p = false;
21709 if (SIBLING_CALL_P (insn))
21713 /* SEH epilogue detection requires the indirect branch case
21714 to include REX.W. */
21715 else if (TARGET_SEH)
21716 xasm = "rex.W jmp %A0";
21720 output_asm_insn (xasm, &call_op);
21724 /* SEH unwinding can require an extra nop to be emitted in several
21725 circumstances. Determine if we have one of those. */
21730 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21732 /* If we get to another real insn, we don't need the nop. */
21736 /* If we get to the epilogue note, prevent a catch region from
21737 being adjacent to the standard epilogue sequence. If non-
21738 call-exceptions, we'll have done this during epilogue emission. */
21739 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21740 && !flag_non_call_exceptions
21741 && !can_throw_internal (insn))
21748 /* If we didn't find a real insn following the call, prevent the
21749 unwinder from looking into the next function. */
21755 xasm = "call\t%P0";
21757 xasm = "call\t%A0";
21759 output_asm_insn (xasm, &call_op);
21767 /* Clear stack slot assignments remembered from previous functions.
21768 This is called from INIT_EXPANDERS once before RTL is emitted for each
21771 static struct machine_function *
21772 ix86_init_machine_status (void)
21774 struct machine_function *f;
21776 f = ggc_alloc_cleared_machine_function ();
21777 f->use_fast_prologue_epilogue_nregs = -1;
21778 f->tls_descriptor_call_expanded_p = 0;
21779 f->call_abi = ix86_abi;
21784 /* Return a MEM corresponding to a stack slot with mode MODE.
21785 Allocate a new slot if necessary.
21787 The RTL for a function can have several slots available: N is
21788 which slot to use. */
21791 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
21793 struct stack_local_entry *s;
21795 gcc_assert (n < MAX_386_STACK_LOCALS);
21797 /* Virtual slot is valid only before vregs are instantiated. */
21798 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
21800 for (s = ix86_stack_locals; s; s = s->next)
21801 if (s->mode == mode && s->n == n)
21802 return validize_mem (copy_rtx (s->rtl));
21804 s = ggc_alloc_stack_local_entry ();
21807 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21809 s->next = ix86_stack_locals;
21810 ix86_stack_locals = s;
21811 return validize_mem (s->rtl);
21814 /* Calculate the length of the memory address in the instruction encoding.
21815 Includes addr32 prefix, does not include the one-byte modrm, opcode,
21816 or other prefixes. */
21819 memory_address_length (rtx addr)
21821 struct ix86_address parts;
21822 rtx base, index, disp;
21826 if (GET_CODE (addr) == PRE_DEC
21827 || GET_CODE (addr) == POST_INC
21828 || GET_CODE (addr) == PRE_MODIFY
21829 || GET_CODE (addr) == POST_MODIFY)
21832 ok = ix86_decompose_address (addr, &parts);
21835 if (parts.base && GET_CODE (parts.base) == SUBREG)
21836 parts.base = SUBREG_REG (parts.base);
21837 if (parts.index && GET_CODE (parts.index) == SUBREG)
21838 parts.index = SUBREG_REG (parts.index);
21841 index = parts.index;
21844 /* Add length of addr32 prefix. */
21845 len = (GET_CODE (addr) == ZERO_EXTEND
21846 || GET_CODE (addr) == AND);
21849 - esp as the base always wants an index,
21850 - ebp as the base always wants a displacement,
21851 - r12 as the base always wants an index,
21852 - r13 as the base always wants a displacement. */
21854 /* Register Indirect. */
21855 if (base && !index && !disp)
21857 /* esp (for its index) and ebp (for its displacement) need
21858 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21861 && (addr == arg_pointer_rtx
21862 || addr == frame_pointer_rtx
21863 || REGNO (addr) == SP_REG
21864 || REGNO (addr) == BP_REG
21865 || REGNO (addr) == R12_REG
21866 || REGNO (addr) == R13_REG))
21870 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21871 is not disp32, but disp32(%rip), so for disp32
21872 SIB byte is needed, unless print_operand_address
21873 optimizes it into disp32(%rip) or (%rip) is implied
21875 else if (disp && !base && !index)
21882 if (GET_CODE (disp) == CONST)
21883 symbol = XEXP (disp, 0);
21884 if (GET_CODE (symbol) == PLUS
21885 && CONST_INT_P (XEXP (symbol, 1)))
21886 symbol = XEXP (symbol, 0);
21888 if (GET_CODE (symbol) != LABEL_REF
21889 && (GET_CODE (symbol) != SYMBOL_REF
21890 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21891 && (GET_CODE (symbol) != UNSPEC
21892 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21893 && XINT (symbol, 1) != UNSPEC_PCREL
21894 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21901 /* Find the length of the displacement constant. */
21904 if (base && satisfies_constraint_K (disp))
21909 /* ebp always wants a displacement. Similarly r13. */
21910 else if (base && REG_P (base)
21911 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21914 /* An index requires the two-byte modrm form.... */
21916 /* ...like esp (or r12), which always wants an index. */
21917 || base == arg_pointer_rtx
21918 || base == frame_pointer_rtx
21919 || (base && REG_P (base)
21920 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21937 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21938 is set, expect that insn have 8bit immediate alternative. */
21940 ix86_attr_length_immediate_default (rtx insn, bool shortform)
21944 extract_insn_cached (insn);
21945 for (i = recog_data.n_operands - 1; i >= 0; --i)
21946 if (CONSTANT_P (recog_data.operand[i]))
21948 enum attr_mode mode = get_attr_mode (insn);
21951 if (shortform && CONST_INT_P (recog_data.operand[i]))
21953 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21960 ival = trunc_int_for_mode (ival, HImode);
21963 ival = trunc_int_for_mode (ival, SImode);
21968 if (IN_RANGE (ival, -128, 127))
21985 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21990 fatal_insn ("unknown insn mode", insn);
21995 /* Compute default value for "length_address" attribute. */
21997 ix86_attr_length_address_default (rtx insn)
22001 if (get_attr_type (insn) == TYPE_LEA)
22003 rtx set = PATTERN (insn), addr;
22005 if (GET_CODE (set) == PARALLEL)
22006 set = XVECEXP (set, 0, 0);
22008 gcc_assert (GET_CODE (set) == SET);
22010 addr = SET_SRC (set);
22011 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22013 if (GET_CODE (addr) == ZERO_EXTEND)
22014 addr = XEXP (addr, 0);
22015 if (GET_CODE (addr) == SUBREG)
22016 addr = SUBREG_REG (addr);
22019 return memory_address_length (addr);
22022 extract_insn_cached (insn);
22023 for (i = recog_data.n_operands - 1; i >= 0; --i)
22024 if (MEM_P (recog_data.operand[i]))
22026 constrain_operands_cached (reload_completed);
22027 if (which_alternative != -1)
22029 const char *constraints = recog_data.constraints[i];
22030 int alt = which_alternative;
22032 while (*constraints == '=' || *constraints == '+')
22035 while (*constraints++ != ',')
22037 /* Skip ignored operands. */
22038 if (*constraints == 'X')
22041 return memory_address_length (XEXP (recog_data.operand[i], 0));
22046 /* Compute default value for "length_vex" attribute. It includes
22047 2 or 3 byte VEX prefix and 1 opcode byte. */
22050 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
22054 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22055 byte VEX prefix. */
22056 if (!has_0f_opcode || has_vex_w)
22059 /* We can always use 2 byte VEX prefix in 32bit. */
22063 extract_insn_cached (insn);
22065 for (i = recog_data.n_operands - 1; i >= 0; --i)
22066 if (REG_P (recog_data.operand[i]))
22068 /* REX.W bit uses 3 byte VEX prefix. */
22069 if (GET_MODE (recog_data.operand[i]) == DImode
22070 && GENERAL_REG_P (recog_data.operand[i]))
22075 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22076 if (MEM_P (recog_data.operand[i])
22077 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22084 /* Return the maximum number of instructions a cpu can issue. */
22087 ix86_issue_rate (void)
22091 case PROCESSOR_PENTIUM:
22092 case PROCESSOR_ATOM:
22096 case PROCESSOR_PENTIUMPRO:
22097 case PROCESSOR_PENTIUM4:
22098 case PROCESSOR_CORE2_32:
22099 case PROCESSOR_CORE2_64:
22100 case PROCESSOR_COREI7_32:
22101 case PROCESSOR_COREI7_64:
22102 case PROCESSOR_ATHLON:
22104 case PROCESSOR_AMDFAM10:
22105 case PROCESSOR_NOCONA:
22106 case PROCESSOR_GENERIC32:
22107 case PROCESSOR_GENERIC64:
22108 case PROCESSOR_BDVER1:
22109 case PROCESSOR_BDVER2:
22110 case PROCESSOR_BTVER1:
22118 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
22119 by DEP_INSN and nothing set by DEP_INSN. */
22122 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22126 /* Simplify the test for uninteresting insns. */
22127 if (insn_type != TYPE_SETCC
22128 && insn_type != TYPE_ICMOV
22129 && insn_type != TYPE_FCMOV
22130 && insn_type != TYPE_IBR)
22133 if ((set = single_set (dep_insn)) != 0)
22135 set = SET_DEST (set);
22138 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22139 && XVECLEN (PATTERN (dep_insn), 0) == 2
22140 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22141 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22143 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22144 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22149 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22152 /* This test is true if the dependent insn reads the flags but
22153 not any other potentially set register. */
22154 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22157 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22163 /* Return true iff USE_INSN has a memory address with operands set by
22167 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22170 extract_insn_cached (use_insn);
22171 for (i = recog_data.n_operands - 1; i >= 0; --i)
22172 if (MEM_P (recog_data.operand[i]))
22174 rtx addr = XEXP (recog_data.operand[i], 0);
22175 return modified_in_p (addr, set_insn) != 0;
22181 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22183 enum attr_type insn_type, dep_insn_type;
22184 enum attr_memory memory;
22186 int dep_insn_code_number;
22188 /* Anti and output dependencies have zero cost on all CPUs. */
22189 if (REG_NOTE_KIND (link) != 0)
22192 dep_insn_code_number = recog_memoized (dep_insn);
22194 /* If we can't recognize the insns, we can't really do anything. */
22195 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22198 insn_type = get_attr_type (insn);
22199 dep_insn_type = get_attr_type (dep_insn);
22203 case PROCESSOR_PENTIUM:
22204 /* Address Generation Interlock adds a cycle of latency. */
22205 if (insn_type == TYPE_LEA)
22207 rtx addr = PATTERN (insn);
22209 if (GET_CODE (addr) == PARALLEL)
22210 addr = XVECEXP (addr, 0, 0);
22212 gcc_assert (GET_CODE (addr) == SET);
22214 addr = SET_SRC (addr);
22215 if (modified_in_p (addr, dep_insn))
22218 else if (ix86_agi_dependent (dep_insn, insn))
22221 /* ??? Compares pair with jump/setcc. */
22222 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22225 /* Floating point stores require value to be ready one cycle earlier. */
22226 if (insn_type == TYPE_FMOV
22227 && get_attr_memory (insn) == MEMORY_STORE
22228 && !ix86_agi_dependent (dep_insn, insn))
22232 case PROCESSOR_PENTIUMPRO:
22233 memory = get_attr_memory (insn);
22235 /* INT->FP conversion is expensive. */
22236 if (get_attr_fp_int_src (dep_insn))
22239 /* There is one cycle extra latency between an FP op and a store. */
22240 if (insn_type == TYPE_FMOV
22241 && (set = single_set (dep_insn)) != NULL_RTX
22242 && (set2 = single_set (insn)) != NULL_RTX
22243 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22244 && MEM_P (SET_DEST (set2)))
22247 /* Show ability of reorder buffer to hide latency of load by executing
22248 in parallel with previous instruction in case
22249 previous instruction is not needed to compute the address. */
22250 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22251 && !ix86_agi_dependent (dep_insn, insn))
22253 /* Claim moves to take one cycle, as core can issue one load
22254 at time and the next load can start cycle later. */
22255 if (dep_insn_type == TYPE_IMOV
22256 || dep_insn_type == TYPE_FMOV)
22264 memory = get_attr_memory (insn);
22266 /* The esp dependency is resolved before the instruction is really
22268 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22269 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22272 /* INT->FP conversion is expensive. */
22273 if (get_attr_fp_int_src (dep_insn))
22276 /* Show ability of reorder buffer to hide latency of load by executing
22277 in parallel with previous instruction in case
22278 previous instruction is not needed to compute the address. */
22279 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22280 && !ix86_agi_dependent (dep_insn, insn))
22282 /* Claim moves to take one cycle, as core can issue one load
22283 at time and the next load can start cycle later. */
22284 if (dep_insn_type == TYPE_IMOV
22285 || dep_insn_type == TYPE_FMOV)
22294 case PROCESSOR_ATHLON:
22296 case PROCESSOR_AMDFAM10:
22297 case PROCESSOR_BDVER1:
22298 case PROCESSOR_BDVER2:
22299 case PROCESSOR_BTVER1:
22300 case PROCESSOR_ATOM:
22301 case PROCESSOR_GENERIC32:
22302 case PROCESSOR_GENERIC64:
22303 memory = get_attr_memory (insn);
22305 /* Show ability of reorder buffer to hide latency of load by executing
22306 in parallel with previous instruction in case
22307 previous instruction is not needed to compute the address. */
22308 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22309 && !ix86_agi_dependent (dep_insn, insn))
22311 enum attr_unit unit = get_attr_unit (insn);
22314 /* Because of the difference between the length of integer and
22315 floating unit pipeline preparation stages, the memory operands
22316 for floating point are cheaper.
22318 ??? For Athlon it the difference is most probably 2. */
22319 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22322 loadcost = TARGET_ATHLON ? 2 : 0;
22324 if (cost >= loadcost)
22337 /* How many alternative schedules to try. This should be as wide as the
22338 scheduling freedom in the DFA, but no wider. Making this value too
22339 large results extra work for the scheduler. */
22342 ia32_multipass_dfa_lookahead (void)
22346 case PROCESSOR_PENTIUM:
22349 case PROCESSOR_PENTIUMPRO:
22353 case PROCESSOR_CORE2_32:
22354 case PROCESSOR_CORE2_64:
22355 case PROCESSOR_COREI7_32:
22356 case PROCESSOR_COREI7_64:
22357 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22358 as many instructions can be executed on a cycle, i.e.,
22359 issue_rate. I wonder why tuning for many CPUs does not do this. */
22360 return ix86_issue_rate ();
22369 /* Model decoder of Core 2/i7.
22370 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22371 track the instruction fetch block boundaries and make sure that long
22372 (9+ bytes) instructions are assigned to D0. */
22374 /* Maximum length of an insn that can be handled by
22375 a secondary decoder unit. '8' for Core 2/i7. */
22376 static int core2i7_secondary_decoder_max_insn_size;
22378 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22379 '16' for Core 2/i7. */
22380 static int core2i7_ifetch_block_size;
22382 /* Maximum number of instructions decoder can handle per cycle.
22383 '6' for Core 2/i7. */
22384 static int core2i7_ifetch_block_max_insns;
22386 typedef struct ix86_first_cycle_multipass_data_ *
22387 ix86_first_cycle_multipass_data_t;
22388 typedef const struct ix86_first_cycle_multipass_data_ *
22389 const_ix86_first_cycle_multipass_data_t;
22391 /* A variable to store target state across calls to max_issue within
22393 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22394 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22396 /* Initialize DATA. */
22398 core2i7_first_cycle_multipass_init (void *_data)
22400 ix86_first_cycle_multipass_data_t data
22401 = (ix86_first_cycle_multipass_data_t) _data;
22403 data->ifetch_block_len = 0;
22404 data->ifetch_block_n_insns = 0;
22405 data->ready_try_change = NULL;
22406 data->ready_try_change_size = 0;
22409 /* Advancing the cycle; reset ifetch block counts. */
22411 core2i7_dfa_post_advance_cycle (void)
22413 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22415 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22417 data->ifetch_block_len = 0;
22418 data->ifetch_block_n_insns = 0;
22421 static int min_insn_size (rtx);
22423 /* Filter out insns from ready_try that the core will not be able to issue
22424 on current cycle due to decoder. */
22426 core2i7_first_cycle_multipass_filter_ready_try
22427 (const_ix86_first_cycle_multipass_data_t data,
22428 char *ready_try, int n_ready, bool first_cycle_insn_p)
22435 if (ready_try[n_ready])
22438 insn = get_ready_element (n_ready);
22439 insn_size = min_insn_size (insn);
22441 if (/* If this is a too long an insn for a secondary decoder ... */
22442 (!first_cycle_insn_p
22443 && insn_size > core2i7_secondary_decoder_max_insn_size)
22444 /* ... or it would not fit into the ifetch block ... */
22445 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22446 /* ... or the decoder is full already ... */
22447 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22448 /* ... mask the insn out. */
22450 ready_try[n_ready] = 1;
22452 if (data->ready_try_change)
22453 SET_BIT (data->ready_try_change, n_ready);
22458 /* Prepare for a new round of multipass lookahead scheduling. */
22460 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22461 bool first_cycle_insn_p)
22463 ix86_first_cycle_multipass_data_t data
22464 = (ix86_first_cycle_multipass_data_t) _data;
22465 const_ix86_first_cycle_multipass_data_t prev_data
22466 = ix86_first_cycle_multipass_data;
22468 /* Restore the state from the end of the previous round. */
22469 data->ifetch_block_len = prev_data->ifetch_block_len;
22470 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22472 /* Filter instructions that cannot be issued on current cycle due to
22473 decoder restrictions. */
22474 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22475 first_cycle_insn_p);
22478 /* INSN is being issued in current solution. Account for its impact on
22479 the decoder model. */
22481 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22482 rtx insn, const void *_prev_data)
22484 ix86_first_cycle_multipass_data_t data
22485 = (ix86_first_cycle_multipass_data_t) _data;
22486 const_ix86_first_cycle_multipass_data_t prev_data
22487 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22489 int insn_size = min_insn_size (insn);
22491 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22492 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22493 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22494 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22496 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22497 if (!data->ready_try_change)
22499 data->ready_try_change = sbitmap_alloc (n_ready);
22500 data->ready_try_change_size = n_ready;
22502 else if (data->ready_try_change_size < n_ready)
22504 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22506 data->ready_try_change_size = n_ready;
22508 sbitmap_zero (data->ready_try_change);
22510 /* Filter out insns from ready_try that the core will not be able to issue
22511 on current cycle due to decoder. */
22512 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22516 /* Revert the effect on ready_try. */
22518 core2i7_first_cycle_multipass_backtrack (const void *_data,
22520 int n_ready ATTRIBUTE_UNUSED)
22522 const_ix86_first_cycle_multipass_data_t data
22523 = (const_ix86_first_cycle_multipass_data_t) _data;
22524 unsigned int i = 0;
22525 sbitmap_iterator sbi;
22527 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22528 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22534 /* Save the result of multipass lookahead scheduling for the next round. */
22536 core2i7_first_cycle_multipass_end (const void *_data)
22538 const_ix86_first_cycle_multipass_data_t data
22539 = (const_ix86_first_cycle_multipass_data_t) _data;
22540 ix86_first_cycle_multipass_data_t next_data
22541 = ix86_first_cycle_multipass_data;
22545 next_data->ifetch_block_len = data->ifetch_block_len;
22546 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22550 /* Deallocate target data. */
22552 core2i7_first_cycle_multipass_fini (void *_data)
22554 ix86_first_cycle_multipass_data_t data
22555 = (ix86_first_cycle_multipass_data_t) _data;
22557 if (data->ready_try_change)
22559 sbitmap_free (data->ready_try_change);
22560 data->ready_try_change = NULL;
22561 data->ready_try_change_size = 0;
22565 /* Prepare for scheduling pass. */
22567 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22568 int verbose ATTRIBUTE_UNUSED,
22569 int max_uid ATTRIBUTE_UNUSED)
22571 /* Install scheduling hooks for current CPU. Some of these hooks are used
22572 in time-critical parts of the scheduler, so we only set them up when
22573 they are actually used. */
22576 case PROCESSOR_CORE2_32:
22577 case PROCESSOR_CORE2_64:
22578 case PROCESSOR_COREI7_32:
22579 case PROCESSOR_COREI7_64:
22580 targetm.sched.dfa_post_advance_cycle
22581 = core2i7_dfa_post_advance_cycle;
22582 targetm.sched.first_cycle_multipass_init
22583 = core2i7_first_cycle_multipass_init;
22584 targetm.sched.first_cycle_multipass_begin
22585 = core2i7_first_cycle_multipass_begin;
22586 targetm.sched.first_cycle_multipass_issue
22587 = core2i7_first_cycle_multipass_issue;
22588 targetm.sched.first_cycle_multipass_backtrack
22589 = core2i7_first_cycle_multipass_backtrack;
22590 targetm.sched.first_cycle_multipass_end
22591 = core2i7_first_cycle_multipass_end;
22592 targetm.sched.first_cycle_multipass_fini
22593 = core2i7_first_cycle_multipass_fini;
22595 /* Set decoder parameters. */
22596 core2i7_secondary_decoder_max_insn_size = 8;
22597 core2i7_ifetch_block_size = 16;
22598 core2i7_ifetch_block_max_insns = 6;
22602 targetm.sched.dfa_post_advance_cycle = NULL;
22603 targetm.sched.first_cycle_multipass_init = NULL;
22604 targetm.sched.first_cycle_multipass_begin = NULL;
22605 targetm.sched.first_cycle_multipass_issue = NULL;
22606 targetm.sched.first_cycle_multipass_backtrack = NULL;
22607 targetm.sched.first_cycle_multipass_end = NULL;
22608 targetm.sched.first_cycle_multipass_fini = NULL;
22614 /* Compute the alignment given to a constant that is being placed in memory.
22615 EXP is the constant and ALIGN is the alignment that the object would
22617 The value of this function is used instead of that alignment to align
22621 ix86_constant_alignment (tree exp, int align)
22623 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22624 || TREE_CODE (exp) == INTEGER_CST)
22626 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22628 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22631 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22632 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22633 return BITS_PER_WORD;
22638 /* Compute the alignment for a static variable.
22639 TYPE is the data type, and ALIGN is the alignment that
22640 the object would ordinarily have. The value of this function is used
22641 instead of that alignment to align the object. */
22644 ix86_data_alignment (tree type, int align)
22646 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22648 if (AGGREGATE_TYPE_P (type)
22649 && TYPE_SIZE (type)
22650 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22651 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22652 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22653 && align < max_align)
22656 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22657 to 16byte boundary. */
22660 if (AGGREGATE_TYPE_P (type)
22661 && TYPE_SIZE (type)
22662 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22663 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22664 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22668 if (TREE_CODE (type) == ARRAY_TYPE)
22670 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22672 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22675 else if (TREE_CODE (type) == COMPLEX_TYPE)
22678 if (TYPE_MODE (type) == DCmode && align < 64)
22680 if ((TYPE_MODE (type) == XCmode
22681 || TYPE_MODE (type) == TCmode) && align < 128)
22684 else if ((TREE_CODE (type) == RECORD_TYPE
22685 || TREE_CODE (type) == UNION_TYPE
22686 || TREE_CODE (type) == QUAL_UNION_TYPE)
22687 && TYPE_FIELDS (type))
22689 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22691 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22694 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22695 || TREE_CODE (type) == INTEGER_TYPE)
22697 if (TYPE_MODE (type) == DFmode && align < 64)
22699 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22706 /* Compute the alignment for a local variable or a stack slot. EXP is
22707 the data type or decl itself, MODE is the widest mode available and
22708 ALIGN is the alignment that the object would ordinarily have. The
22709 value of this macro is used instead of that alignment to align the
22713 ix86_local_alignment (tree exp, enum machine_mode mode,
22714 unsigned int align)
22718 if (exp && DECL_P (exp))
22720 type = TREE_TYPE (exp);
22729 /* Don't do dynamic stack realignment for long long objects with
22730 -mpreferred-stack-boundary=2. */
22733 && ix86_preferred_stack_boundary < 64
22734 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
22735 && (!type || !TYPE_USER_ALIGN (type))
22736 && (!decl || !DECL_USER_ALIGN (decl)))
22739 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22740 register in MODE. We will return the largest alignment of XF
22744 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
22745 align = GET_MODE_ALIGNMENT (DFmode);
22749 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22750 to 16byte boundary. Exact wording is:
22752 An array uses the same alignment as its elements, except that a local or
22753 global array variable of length at least 16 bytes or
22754 a C99 variable-length array variable always has alignment of at least 16 bytes.
22756 This was added to allow use of aligned SSE instructions at arrays. This
22757 rule is meant for static storage (where compiler can not do the analysis
22758 by itself). We follow it for automatic variables only when convenient.
22759 We fully control everything in the function compiled and functions from
22760 other unit can not rely on the alignment.
22762 Exclude va_list type. It is the common case of local array where
22763 we can not benefit from the alignment. */
22764 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
22767 if (AGGREGATE_TYPE_P (type)
22768 && (va_list_type_node == NULL_TREE
22769 || (TYPE_MAIN_VARIANT (type)
22770 != TYPE_MAIN_VARIANT (va_list_type_node)))
22771 && TYPE_SIZE (type)
22772 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22773 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
22774 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22777 if (TREE_CODE (type) == ARRAY_TYPE)
22779 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22781 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22784 else if (TREE_CODE (type) == COMPLEX_TYPE)
22786 if (TYPE_MODE (type) == DCmode && align < 64)
22788 if ((TYPE_MODE (type) == XCmode
22789 || TYPE_MODE (type) == TCmode) && align < 128)
22792 else if ((TREE_CODE (type) == RECORD_TYPE
22793 || TREE_CODE (type) == UNION_TYPE
22794 || TREE_CODE (type) == QUAL_UNION_TYPE)
22795 && TYPE_FIELDS (type))
22797 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22799 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22802 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22803 || TREE_CODE (type) == INTEGER_TYPE)
22806 if (TYPE_MODE (type) == DFmode && align < 64)
22808 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22814 /* Compute the minimum required alignment for dynamic stack realignment
22815 purposes for a local variable, parameter or a stack slot. EXP is
22816 the data type or decl itself, MODE is its mode and ALIGN is the
22817 alignment that the object would ordinarily have. */
22820 ix86_minimum_alignment (tree exp, enum machine_mode mode,
22821 unsigned int align)
22825 if (exp && DECL_P (exp))
22827 type = TREE_TYPE (exp);
22836 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
22839 /* Don't do dynamic stack realignment for long long objects with
22840 -mpreferred-stack-boundary=2. */
22841 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
22842 && (!type || !TYPE_USER_ALIGN (type))
22843 && (!decl || !DECL_USER_ALIGN (decl)))
22849 /* Find a location for the static chain incoming to a nested function.
22850 This is a register, unless all free registers are used by arguments. */
22853 ix86_static_chain (const_tree fndecl, bool incoming_p)
22857 if (!DECL_STATIC_CHAIN (fndecl))
22862 /* We always use R10 in 64-bit mode. */
22870 /* By default in 32-bit mode we use ECX to pass the static chain. */
22873 fntype = TREE_TYPE (fndecl);
22874 ccvt = ix86_get_callcvt (fntype);
22875 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
22877 /* Fastcall functions use ecx/edx for arguments, which leaves
22878 us with EAX for the static chain.
22879 Thiscall functions use ecx for arguments, which also
22880 leaves us with EAX for the static chain. */
22883 else if (ix86_function_regparm (fntype, fndecl) == 3)
22885 /* For regparm 3, we have no free call-clobbered registers in
22886 which to store the static chain. In order to implement this,
22887 we have the trampoline push the static chain to the stack.
22888 However, we can't push a value below the return address when
22889 we call the nested function directly, so we have to use an
22890 alternate entry point. For this we use ESI, and have the
22891 alternate entry point push ESI, so that things appear the
22892 same once we're executing the nested function. */
22895 if (fndecl == current_function_decl)
22896 ix86_static_chain_on_stack = true;
22897 return gen_frame_mem (SImode,
22898 plus_constant (arg_pointer_rtx, -8));
22904 return gen_rtx_REG (Pmode, regno);
22907 /* Emit RTL insns to initialize the variable parts of a trampoline.
22908 FNDECL is the decl of the target address; M_TRAMP is a MEM for
22909 the trampoline, and CHAIN_VALUE is an RTX for the static chain
22910 to be passed to the target function. */
22913 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
22919 fnaddr = XEXP (DECL_RTL (fndecl), 0);
22925 /* Load the function address to r11. Try to load address using
22926 the shorter movl instead of movabs. We may want to support
22927 movq for kernel mode, but kernel does not use trampolines at
22929 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
22931 fnaddr = copy_to_mode_reg (DImode, fnaddr);
22933 mem = adjust_address (m_tramp, HImode, offset);
22934 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
22936 mem = adjust_address (m_tramp, SImode, offset + 2);
22937 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
22942 mem = adjust_address (m_tramp, HImode, offset);
22943 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
22945 mem = adjust_address (m_tramp, DImode, offset + 2);
22946 emit_move_insn (mem, fnaddr);
22950 /* Load static chain using movabs to r10. Use the
22951 shorter movl instead of movabs for x32. */
22963 mem = adjust_address (m_tramp, HImode, offset);
22964 emit_move_insn (mem, gen_int_mode (opcode, HImode));
22966 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
22967 emit_move_insn (mem, chain_value);
22970 /* Jump to r11; the last (unused) byte is a nop, only there to
22971 pad the write out to a single 32-bit store. */
22972 mem = adjust_address (m_tramp, SImode, offset);
22973 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
22980 /* Depending on the static chain location, either load a register
22981 with a constant, or push the constant to the stack. All of the
22982 instructions are the same size. */
22983 chain = ix86_static_chain (fndecl, true);
22986 switch (REGNO (chain))
22989 opcode = 0xb8; break;
22991 opcode = 0xb9; break;
22993 gcc_unreachable ();
22999 mem = adjust_address (m_tramp, QImode, offset);
23000 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23002 mem = adjust_address (m_tramp, SImode, offset + 1);
23003 emit_move_insn (mem, chain_value);
23006 mem = adjust_address (m_tramp, QImode, offset);
23007 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23009 mem = adjust_address (m_tramp, SImode, offset + 1);
23011 /* Compute offset from the end of the jmp to the target function.
23012 In the case in which the trampoline stores the static chain on
23013 the stack, we need to skip the first insn which pushes the
23014 (call-saved) register static chain; this push is 1 byte. */
23016 disp = expand_binop (SImode, sub_optab, fnaddr,
23017 plus_constant (XEXP (m_tramp, 0),
23018 offset - (MEM_P (chain) ? 1 : 0)),
23019 NULL_RTX, 1, OPTAB_DIRECT);
23020 emit_move_insn (mem, disp);
23023 gcc_assert (offset <= TRAMPOLINE_SIZE);
23025 #ifdef HAVE_ENABLE_EXECUTE_STACK
23026 #ifdef CHECK_EXECUTE_STACK_ENABLED
23027 if (CHECK_EXECUTE_STACK_ENABLED)
23029 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23030 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23034 /* The following file contains several enumerations and data structures
23035 built from the definitions in i386-builtin-types.def. */
23037 #include "i386-builtin-types.inc"
23039 /* Table for the ix86 builtin non-function types. */
23040 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23042 /* Retrieve an element from the above table, building some of
23043 the types lazily. */
23046 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23048 unsigned int index;
23051 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23053 type = ix86_builtin_type_tab[(int) tcode];
23057 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23058 if (tcode <= IX86_BT_LAST_VECT)
23060 enum machine_mode mode;
23062 index = tcode - IX86_BT_LAST_PRIM - 1;
23063 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23064 mode = ix86_builtin_type_vect_mode[index];
23066 type = build_vector_type_for_mode (itype, mode);
23072 index = tcode - IX86_BT_LAST_VECT - 1;
23073 if (tcode <= IX86_BT_LAST_PTR)
23074 quals = TYPE_UNQUALIFIED;
23076 quals = TYPE_QUAL_CONST;
23078 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23079 if (quals != TYPE_UNQUALIFIED)
23080 itype = build_qualified_type (itype, quals);
23082 type = build_pointer_type (itype);
23085 ix86_builtin_type_tab[(int) tcode] = type;
23089 /* Table for the ix86 builtin function types. */
23090 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23092 /* Retrieve an element from the above table, building some of
23093 the types lazily. */
23096 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23100 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23102 type = ix86_builtin_func_type_tab[(int) tcode];
23106 if (tcode <= IX86_BT_LAST_FUNC)
23108 unsigned start = ix86_builtin_func_start[(int) tcode];
23109 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23110 tree rtype, atype, args = void_list_node;
23113 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23114 for (i = after - 1; i > start; --i)
23116 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23117 args = tree_cons (NULL, atype, args);
23120 type = build_function_type (rtype, args);
23124 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23125 enum ix86_builtin_func_type icode;
23127 icode = ix86_builtin_func_alias_base[index];
23128 type = ix86_get_builtin_func_type (icode);
23131 ix86_builtin_func_type_tab[(int) tcode] = type;
23136 /* Codes for all the SSE/MMX builtins. */
23139 IX86_BUILTIN_ADDPS,
23140 IX86_BUILTIN_ADDSS,
23141 IX86_BUILTIN_DIVPS,
23142 IX86_BUILTIN_DIVSS,
23143 IX86_BUILTIN_MULPS,
23144 IX86_BUILTIN_MULSS,
23145 IX86_BUILTIN_SUBPS,
23146 IX86_BUILTIN_SUBSS,
23148 IX86_BUILTIN_CMPEQPS,
23149 IX86_BUILTIN_CMPLTPS,
23150 IX86_BUILTIN_CMPLEPS,
23151 IX86_BUILTIN_CMPGTPS,
23152 IX86_BUILTIN_CMPGEPS,
23153 IX86_BUILTIN_CMPNEQPS,
23154 IX86_BUILTIN_CMPNLTPS,
23155 IX86_BUILTIN_CMPNLEPS,
23156 IX86_BUILTIN_CMPNGTPS,
23157 IX86_BUILTIN_CMPNGEPS,
23158 IX86_BUILTIN_CMPORDPS,
23159 IX86_BUILTIN_CMPUNORDPS,
23160 IX86_BUILTIN_CMPEQSS,
23161 IX86_BUILTIN_CMPLTSS,
23162 IX86_BUILTIN_CMPLESS,
23163 IX86_BUILTIN_CMPNEQSS,
23164 IX86_BUILTIN_CMPNLTSS,
23165 IX86_BUILTIN_CMPNLESS,
23166 IX86_BUILTIN_CMPNGTSS,
23167 IX86_BUILTIN_CMPNGESS,
23168 IX86_BUILTIN_CMPORDSS,
23169 IX86_BUILTIN_CMPUNORDSS,
23171 IX86_BUILTIN_COMIEQSS,
23172 IX86_BUILTIN_COMILTSS,
23173 IX86_BUILTIN_COMILESS,
23174 IX86_BUILTIN_COMIGTSS,
23175 IX86_BUILTIN_COMIGESS,
23176 IX86_BUILTIN_COMINEQSS,
23177 IX86_BUILTIN_UCOMIEQSS,
23178 IX86_BUILTIN_UCOMILTSS,
23179 IX86_BUILTIN_UCOMILESS,
23180 IX86_BUILTIN_UCOMIGTSS,
23181 IX86_BUILTIN_UCOMIGESS,
23182 IX86_BUILTIN_UCOMINEQSS,
23184 IX86_BUILTIN_CVTPI2PS,
23185 IX86_BUILTIN_CVTPS2PI,
23186 IX86_BUILTIN_CVTSI2SS,
23187 IX86_BUILTIN_CVTSI642SS,
23188 IX86_BUILTIN_CVTSS2SI,
23189 IX86_BUILTIN_CVTSS2SI64,
23190 IX86_BUILTIN_CVTTPS2PI,
23191 IX86_BUILTIN_CVTTSS2SI,
23192 IX86_BUILTIN_CVTTSS2SI64,
23194 IX86_BUILTIN_MAXPS,
23195 IX86_BUILTIN_MAXSS,
23196 IX86_BUILTIN_MINPS,
23197 IX86_BUILTIN_MINSS,
23199 IX86_BUILTIN_LOADUPS,
23200 IX86_BUILTIN_STOREUPS,
23201 IX86_BUILTIN_MOVSS,
23203 IX86_BUILTIN_MOVHLPS,
23204 IX86_BUILTIN_MOVLHPS,
23205 IX86_BUILTIN_LOADHPS,
23206 IX86_BUILTIN_LOADLPS,
23207 IX86_BUILTIN_STOREHPS,
23208 IX86_BUILTIN_STORELPS,
23210 IX86_BUILTIN_MASKMOVQ,
23211 IX86_BUILTIN_MOVMSKPS,
23212 IX86_BUILTIN_PMOVMSKB,
23214 IX86_BUILTIN_MOVNTPS,
23215 IX86_BUILTIN_MOVNTQ,
23217 IX86_BUILTIN_LOADDQU,
23218 IX86_BUILTIN_STOREDQU,
23220 IX86_BUILTIN_PACKSSWB,
23221 IX86_BUILTIN_PACKSSDW,
23222 IX86_BUILTIN_PACKUSWB,
23224 IX86_BUILTIN_PADDB,
23225 IX86_BUILTIN_PADDW,
23226 IX86_BUILTIN_PADDD,
23227 IX86_BUILTIN_PADDQ,
23228 IX86_BUILTIN_PADDSB,
23229 IX86_BUILTIN_PADDSW,
23230 IX86_BUILTIN_PADDUSB,
23231 IX86_BUILTIN_PADDUSW,
23232 IX86_BUILTIN_PSUBB,
23233 IX86_BUILTIN_PSUBW,
23234 IX86_BUILTIN_PSUBD,
23235 IX86_BUILTIN_PSUBQ,
23236 IX86_BUILTIN_PSUBSB,
23237 IX86_BUILTIN_PSUBSW,
23238 IX86_BUILTIN_PSUBUSB,
23239 IX86_BUILTIN_PSUBUSW,
23242 IX86_BUILTIN_PANDN,
23246 IX86_BUILTIN_PAVGB,
23247 IX86_BUILTIN_PAVGW,
23249 IX86_BUILTIN_PCMPEQB,
23250 IX86_BUILTIN_PCMPEQW,
23251 IX86_BUILTIN_PCMPEQD,
23252 IX86_BUILTIN_PCMPGTB,
23253 IX86_BUILTIN_PCMPGTW,
23254 IX86_BUILTIN_PCMPGTD,
23256 IX86_BUILTIN_PMADDWD,
23258 IX86_BUILTIN_PMAXSW,
23259 IX86_BUILTIN_PMAXUB,
23260 IX86_BUILTIN_PMINSW,
23261 IX86_BUILTIN_PMINUB,
23263 IX86_BUILTIN_PMULHUW,
23264 IX86_BUILTIN_PMULHW,
23265 IX86_BUILTIN_PMULLW,
23267 IX86_BUILTIN_PSADBW,
23268 IX86_BUILTIN_PSHUFW,
23270 IX86_BUILTIN_PSLLW,
23271 IX86_BUILTIN_PSLLD,
23272 IX86_BUILTIN_PSLLQ,
23273 IX86_BUILTIN_PSRAW,
23274 IX86_BUILTIN_PSRAD,
23275 IX86_BUILTIN_PSRLW,
23276 IX86_BUILTIN_PSRLD,
23277 IX86_BUILTIN_PSRLQ,
23278 IX86_BUILTIN_PSLLWI,
23279 IX86_BUILTIN_PSLLDI,
23280 IX86_BUILTIN_PSLLQI,
23281 IX86_BUILTIN_PSRAWI,
23282 IX86_BUILTIN_PSRADI,
23283 IX86_BUILTIN_PSRLWI,
23284 IX86_BUILTIN_PSRLDI,
23285 IX86_BUILTIN_PSRLQI,
23287 IX86_BUILTIN_PUNPCKHBW,
23288 IX86_BUILTIN_PUNPCKHWD,
23289 IX86_BUILTIN_PUNPCKHDQ,
23290 IX86_BUILTIN_PUNPCKLBW,
23291 IX86_BUILTIN_PUNPCKLWD,
23292 IX86_BUILTIN_PUNPCKLDQ,
23294 IX86_BUILTIN_SHUFPS,
23296 IX86_BUILTIN_RCPPS,
23297 IX86_BUILTIN_RCPSS,
23298 IX86_BUILTIN_RSQRTPS,
23299 IX86_BUILTIN_RSQRTPS_NR,
23300 IX86_BUILTIN_RSQRTSS,
23301 IX86_BUILTIN_RSQRTF,
23302 IX86_BUILTIN_SQRTPS,
23303 IX86_BUILTIN_SQRTPS_NR,
23304 IX86_BUILTIN_SQRTSS,
23306 IX86_BUILTIN_UNPCKHPS,
23307 IX86_BUILTIN_UNPCKLPS,
23309 IX86_BUILTIN_ANDPS,
23310 IX86_BUILTIN_ANDNPS,
23312 IX86_BUILTIN_XORPS,
23315 IX86_BUILTIN_LDMXCSR,
23316 IX86_BUILTIN_STMXCSR,
23317 IX86_BUILTIN_SFENCE,
23319 /* 3DNow! Original */
23320 IX86_BUILTIN_FEMMS,
23321 IX86_BUILTIN_PAVGUSB,
23322 IX86_BUILTIN_PF2ID,
23323 IX86_BUILTIN_PFACC,
23324 IX86_BUILTIN_PFADD,
23325 IX86_BUILTIN_PFCMPEQ,
23326 IX86_BUILTIN_PFCMPGE,
23327 IX86_BUILTIN_PFCMPGT,
23328 IX86_BUILTIN_PFMAX,
23329 IX86_BUILTIN_PFMIN,
23330 IX86_BUILTIN_PFMUL,
23331 IX86_BUILTIN_PFRCP,
23332 IX86_BUILTIN_PFRCPIT1,
23333 IX86_BUILTIN_PFRCPIT2,
23334 IX86_BUILTIN_PFRSQIT1,
23335 IX86_BUILTIN_PFRSQRT,
23336 IX86_BUILTIN_PFSUB,
23337 IX86_BUILTIN_PFSUBR,
23338 IX86_BUILTIN_PI2FD,
23339 IX86_BUILTIN_PMULHRW,
23341 /* 3DNow! Athlon Extensions */
23342 IX86_BUILTIN_PF2IW,
23343 IX86_BUILTIN_PFNACC,
23344 IX86_BUILTIN_PFPNACC,
23345 IX86_BUILTIN_PI2FW,
23346 IX86_BUILTIN_PSWAPDSI,
23347 IX86_BUILTIN_PSWAPDSF,
23350 IX86_BUILTIN_ADDPD,
23351 IX86_BUILTIN_ADDSD,
23352 IX86_BUILTIN_DIVPD,
23353 IX86_BUILTIN_DIVSD,
23354 IX86_BUILTIN_MULPD,
23355 IX86_BUILTIN_MULSD,
23356 IX86_BUILTIN_SUBPD,
23357 IX86_BUILTIN_SUBSD,
23359 IX86_BUILTIN_CMPEQPD,
23360 IX86_BUILTIN_CMPLTPD,
23361 IX86_BUILTIN_CMPLEPD,
23362 IX86_BUILTIN_CMPGTPD,
23363 IX86_BUILTIN_CMPGEPD,
23364 IX86_BUILTIN_CMPNEQPD,
23365 IX86_BUILTIN_CMPNLTPD,
23366 IX86_BUILTIN_CMPNLEPD,
23367 IX86_BUILTIN_CMPNGTPD,
23368 IX86_BUILTIN_CMPNGEPD,
23369 IX86_BUILTIN_CMPORDPD,
23370 IX86_BUILTIN_CMPUNORDPD,
23371 IX86_BUILTIN_CMPEQSD,
23372 IX86_BUILTIN_CMPLTSD,
23373 IX86_BUILTIN_CMPLESD,
23374 IX86_BUILTIN_CMPNEQSD,
23375 IX86_BUILTIN_CMPNLTSD,
23376 IX86_BUILTIN_CMPNLESD,
23377 IX86_BUILTIN_CMPORDSD,
23378 IX86_BUILTIN_CMPUNORDSD,
23380 IX86_BUILTIN_COMIEQSD,
23381 IX86_BUILTIN_COMILTSD,
23382 IX86_BUILTIN_COMILESD,
23383 IX86_BUILTIN_COMIGTSD,
23384 IX86_BUILTIN_COMIGESD,
23385 IX86_BUILTIN_COMINEQSD,
23386 IX86_BUILTIN_UCOMIEQSD,
23387 IX86_BUILTIN_UCOMILTSD,
23388 IX86_BUILTIN_UCOMILESD,
23389 IX86_BUILTIN_UCOMIGTSD,
23390 IX86_BUILTIN_UCOMIGESD,
23391 IX86_BUILTIN_UCOMINEQSD,
23393 IX86_BUILTIN_MAXPD,
23394 IX86_BUILTIN_MAXSD,
23395 IX86_BUILTIN_MINPD,
23396 IX86_BUILTIN_MINSD,
23398 IX86_BUILTIN_ANDPD,
23399 IX86_BUILTIN_ANDNPD,
23401 IX86_BUILTIN_XORPD,
23403 IX86_BUILTIN_SQRTPD,
23404 IX86_BUILTIN_SQRTSD,
23406 IX86_BUILTIN_UNPCKHPD,
23407 IX86_BUILTIN_UNPCKLPD,
23409 IX86_BUILTIN_SHUFPD,
23411 IX86_BUILTIN_LOADUPD,
23412 IX86_BUILTIN_STOREUPD,
23413 IX86_BUILTIN_MOVSD,
23415 IX86_BUILTIN_LOADHPD,
23416 IX86_BUILTIN_LOADLPD,
23418 IX86_BUILTIN_CVTDQ2PD,
23419 IX86_BUILTIN_CVTDQ2PS,
23421 IX86_BUILTIN_CVTPD2DQ,
23422 IX86_BUILTIN_CVTPD2PI,
23423 IX86_BUILTIN_CVTPD2PS,
23424 IX86_BUILTIN_CVTTPD2DQ,
23425 IX86_BUILTIN_CVTTPD2PI,
23427 IX86_BUILTIN_CVTPI2PD,
23428 IX86_BUILTIN_CVTSI2SD,
23429 IX86_BUILTIN_CVTSI642SD,
23431 IX86_BUILTIN_CVTSD2SI,
23432 IX86_BUILTIN_CVTSD2SI64,
23433 IX86_BUILTIN_CVTSD2SS,
23434 IX86_BUILTIN_CVTSS2SD,
23435 IX86_BUILTIN_CVTTSD2SI,
23436 IX86_BUILTIN_CVTTSD2SI64,
23438 IX86_BUILTIN_CVTPS2DQ,
23439 IX86_BUILTIN_CVTPS2PD,
23440 IX86_BUILTIN_CVTTPS2DQ,
23442 IX86_BUILTIN_MOVNTI,
23443 IX86_BUILTIN_MOVNTPD,
23444 IX86_BUILTIN_MOVNTDQ,
23446 IX86_BUILTIN_MOVQ128,
23449 IX86_BUILTIN_MASKMOVDQU,
23450 IX86_BUILTIN_MOVMSKPD,
23451 IX86_BUILTIN_PMOVMSKB128,
23453 IX86_BUILTIN_PACKSSWB128,
23454 IX86_BUILTIN_PACKSSDW128,
23455 IX86_BUILTIN_PACKUSWB128,
23457 IX86_BUILTIN_PADDB128,
23458 IX86_BUILTIN_PADDW128,
23459 IX86_BUILTIN_PADDD128,
23460 IX86_BUILTIN_PADDQ128,
23461 IX86_BUILTIN_PADDSB128,
23462 IX86_BUILTIN_PADDSW128,
23463 IX86_BUILTIN_PADDUSB128,
23464 IX86_BUILTIN_PADDUSW128,
23465 IX86_BUILTIN_PSUBB128,
23466 IX86_BUILTIN_PSUBW128,
23467 IX86_BUILTIN_PSUBD128,
23468 IX86_BUILTIN_PSUBQ128,
23469 IX86_BUILTIN_PSUBSB128,
23470 IX86_BUILTIN_PSUBSW128,
23471 IX86_BUILTIN_PSUBUSB128,
23472 IX86_BUILTIN_PSUBUSW128,
23474 IX86_BUILTIN_PAND128,
23475 IX86_BUILTIN_PANDN128,
23476 IX86_BUILTIN_POR128,
23477 IX86_BUILTIN_PXOR128,
23479 IX86_BUILTIN_PAVGB128,
23480 IX86_BUILTIN_PAVGW128,
23482 IX86_BUILTIN_PCMPEQB128,
23483 IX86_BUILTIN_PCMPEQW128,
23484 IX86_BUILTIN_PCMPEQD128,
23485 IX86_BUILTIN_PCMPGTB128,
23486 IX86_BUILTIN_PCMPGTW128,
23487 IX86_BUILTIN_PCMPGTD128,
23489 IX86_BUILTIN_PMADDWD128,
23491 IX86_BUILTIN_PMAXSW128,
23492 IX86_BUILTIN_PMAXUB128,
23493 IX86_BUILTIN_PMINSW128,
23494 IX86_BUILTIN_PMINUB128,
23496 IX86_BUILTIN_PMULUDQ,
23497 IX86_BUILTIN_PMULUDQ128,
23498 IX86_BUILTIN_PMULHUW128,
23499 IX86_BUILTIN_PMULHW128,
23500 IX86_BUILTIN_PMULLW128,
23502 IX86_BUILTIN_PSADBW128,
23503 IX86_BUILTIN_PSHUFHW,
23504 IX86_BUILTIN_PSHUFLW,
23505 IX86_BUILTIN_PSHUFD,
23507 IX86_BUILTIN_PSLLDQI128,
23508 IX86_BUILTIN_PSLLWI128,
23509 IX86_BUILTIN_PSLLDI128,
23510 IX86_BUILTIN_PSLLQI128,
23511 IX86_BUILTIN_PSRAWI128,
23512 IX86_BUILTIN_PSRADI128,
23513 IX86_BUILTIN_PSRLDQI128,
23514 IX86_BUILTIN_PSRLWI128,
23515 IX86_BUILTIN_PSRLDI128,
23516 IX86_BUILTIN_PSRLQI128,
23518 IX86_BUILTIN_PSLLDQ128,
23519 IX86_BUILTIN_PSLLW128,
23520 IX86_BUILTIN_PSLLD128,
23521 IX86_BUILTIN_PSLLQ128,
23522 IX86_BUILTIN_PSRAW128,
23523 IX86_BUILTIN_PSRAD128,
23524 IX86_BUILTIN_PSRLW128,
23525 IX86_BUILTIN_PSRLD128,
23526 IX86_BUILTIN_PSRLQ128,
23528 IX86_BUILTIN_PUNPCKHBW128,
23529 IX86_BUILTIN_PUNPCKHWD128,
23530 IX86_BUILTIN_PUNPCKHDQ128,
23531 IX86_BUILTIN_PUNPCKHQDQ128,
23532 IX86_BUILTIN_PUNPCKLBW128,
23533 IX86_BUILTIN_PUNPCKLWD128,
23534 IX86_BUILTIN_PUNPCKLDQ128,
23535 IX86_BUILTIN_PUNPCKLQDQ128,
23537 IX86_BUILTIN_CLFLUSH,
23538 IX86_BUILTIN_MFENCE,
23539 IX86_BUILTIN_LFENCE,
23540 IX86_BUILTIN_PAUSE,
23542 IX86_BUILTIN_BSRSI,
23543 IX86_BUILTIN_BSRDI,
23544 IX86_BUILTIN_RDPMC,
23545 IX86_BUILTIN_RDTSC,
23546 IX86_BUILTIN_RDTSCP,
23547 IX86_BUILTIN_ROLQI,
23548 IX86_BUILTIN_ROLHI,
23549 IX86_BUILTIN_RORQI,
23550 IX86_BUILTIN_RORHI,
23553 IX86_BUILTIN_ADDSUBPS,
23554 IX86_BUILTIN_HADDPS,
23555 IX86_BUILTIN_HSUBPS,
23556 IX86_BUILTIN_MOVSHDUP,
23557 IX86_BUILTIN_MOVSLDUP,
23558 IX86_BUILTIN_ADDSUBPD,
23559 IX86_BUILTIN_HADDPD,
23560 IX86_BUILTIN_HSUBPD,
23561 IX86_BUILTIN_LDDQU,
23563 IX86_BUILTIN_MONITOR,
23564 IX86_BUILTIN_MWAIT,
23567 IX86_BUILTIN_PHADDW,
23568 IX86_BUILTIN_PHADDD,
23569 IX86_BUILTIN_PHADDSW,
23570 IX86_BUILTIN_PHSUBW,
23571 IX86_BUILTIN_PHSUBD,
23572 IX86_BUILTIN_PHSUBSW,
23573 IX86_BUILTIN_PMADDUBSW,
23574 IX86_BUILTIN_PMULHRSW,
23575 IX86_BUILTIN_PSHUFB,
23576 IX86_BUILTIN_PSIGNB,
23577 IX86_BUILTIN_PSIGNW,
23578 IX86_BUILTIN_PSIGND,
23579 IX86_BUILTIN_PALIGNR,
23580 IX86_BUILTIN_PABSB,
23581 IX86_BUILTIN_PABSW,
23582 IX86_BUILTIN_PABSD,
23584 IX86_BUILTIN_PHADDW128,
23585 IX86_BUILTIN_PHADDD128,
23586 IX86_BUILTIN_PHADDSW128,
23587 IX86_BUILTIN_PHSUBW128,
23588 IX86_BUILTIN_PHSUBD128,
23589 IX86_BUILTIN_PHSUBSW128,
23590 IX86_BUILTIN_PMADDUBSW128,
23591 IX86_BUILTIN_PMULHRSW128,
23592 IX86_BUILTIN_PSHUFB128,
23593 IX86_BUILTIN_PSIGNB128,
23594 IX86_BUILTIN_PSIGNW128,
23595 IX86_BUILTIN_PSIGND128,
23596 IX86_BUILTIN_PALIGNR128,
23597 IX86_BUILTIN_PABSB128,
23598 IX86_BUILTIN_PABSW128,
23599 IX86_BUILTIN_PABSD128,
23601 /* AMDFAM10 - SSE4A New Instructions. */
23602 IX86_BUILTIN_MOVNTSD,
23603 IX86_BUILTIN_MOVNTSS,
23604 IX86_BUILTIN_EXTRQI,
23605 IX86_BUILTIN_EXTRQ,
23606 IX86_BUILTIN_INSERTQI,
23607 IX86_BUILTIN_INSERTQ,
23610 IX86_BUILTIN_BLENDPD,
23611 IX86_BUILTIN_BLENDPS,
23612 IX86_BUILTIN_BLENDVPD,
23613 IX86_BUILTIN_BLENDVPS,
23614 IX86_BUILTIN_PBLENDVB128,
23615 IX86_BUILTIN_PBLENDW128,
23620 IX86_BUILTIN_INSERTPS128,
23622 IX86_BUILTIN_MOVNTDQA,
23623 IX86_BUILTIN_MPSADBW128,
23624 IX86_BUILTIN_PACKUSDW128,
23625 IX86_BUILTIN_PCMPEQQ,
23626 IX86_BUILTIN_PHMINPOSUW128,
23628 IX86_BUILTIN_PMAXSB128,
23629 IX86_BUILTIN_PMAXSD128,
23630 IX86_BUILTIN_PMAXUD128,
23631 IX86_BUILTIN_PMAXUW128,
23633 IX86_BUILTIN_PMINSB128,
23634 IX86_BUILTIN_PMINSD128,
23635 IX86_BUILTIN_PMINUD128,
23636 IX86_BUILTIN_PMINUW128,
23638 IX86_BUILTIN_PMOVSXBW128,
23639 IX86_BUILTIN_PMOVSXBD128,
23640 IX86_BUILTIN_PMOVSXBQ128,
23641 IX86_BUILTIN_PMOVSXWD128,
23642 IX86_BUILTIN_PMOVSXWQ128,
23643 IX86_BUILTIN_PMOVSXDQ128,
23645 IX86_BUILTIN_PMOVZXBW128,
23646 IX86_BUILTIN_PMOVZXBD128,
23647 IX86_BUILTIN_PMOVZXBQ128,
23648 IX86_BUILTIN_PMOVZXWD128,
23649 IX86_BUILTIN_PMOVZXWQ128,
23650 IX86_BUILTIN_PMOVZXDQ128,
23652 IX86_BUILTIN_PMULDQ128,
23653 IX86_BUILTIN_PMULLD128,
23655 IX86_BUILTIN_ROUNDPD,
23656 IX86_BUILTIN_ROUNDPS,
23657 IX86_BUILTIN_ROUNDSD,
23658 IX86_BUILTIN_ROUNDSS,
23660 IX86_BUILTIN_FLOORPD,
23661 IX86_BUILTIN_CEILPD,
23662 IX86_BUILTIN_TRUNCPD,
23663 IX86_BUILTIN_RINTPD,
23664 IX86_BUILTIN_ROUNDPD_AZ,
23665 IX86_BUILTIN_FLOORPS,
23666 IX86_BUILTIN_CEILPS,
23667 IX86_BUILTIN_TRUNCPS,
23668 IX86_BUILTIN_RINTPS,
23669 IX86_BUILTIN_ROUNDPS_AZ,
23671 IX86_BUILTIN_PTESTZ,
23672 IX86_BUILTIN_PTESTC,
23673 IX86_BUILTIN_PTESTNZC,
23675 IX86_BUILTIN_VEC_INIT_V2SI,
23676 IX86_BUILTIN_VEC_INIT_V4HI,
23677 IX86_BUILTIN_VEC_INIT_V8QI,
23678 IX86_BUILTIN_VEC_EXT_V2DF,
23679 IX86_BUILTIN_VEC_EXT_V2DI,
23680 IX86_BUILTIN_VEC_EXT_V4SF,
23681 IX86_BUILTIN_VEC_EXT_V4SI,
23682 IX86_BUILTIN_VEC_EXT_V8HI,
23683 IX86_BUILTIN_VEC_EXT_V2SI,
23684 IX86_BUILTIN_VEC_EXT_V4HI,
23685 IX86_BUILTIN_VEC_EXT_V16QI,
23686 IX86_BUILTIN_VEC_SET_V2DI,
23687 IX86_BUILTIN_VEC_SET_V4SF,
23688 IX86_BUILTIN_VEC_SET_V4SI,
23689 IX86_BUILTIN_VEC_SET_V8HI,
23690 IX86_BUILTIN_VEC_SET_V4HI,
23691 IX86_BUILTIN_VEC_SET_V16QI,
23693 IX86_BUILTIN_VEC_PACK_SFIX,
23696 IX86_BUILTIN_CRC32QI,
23697 IX86_BUILTIN_CRC32HI,
23698 IX86_BUILTIN_CRC32SI,
23699 IX86_BUILTIN_CRC32DI,
23701 IX86_BUILTIN_PCMPESTRI128,
23702 IX86_BUILTIN_PCMPESTRM128,
23703 IX86_BUILTIN_PCMPESTRA128,
23704 IX86_BUILTIN_PCMPESTRC128,
23705 IX86_BUILTIN_PCMPESTRO128,
23706 IX86_BUILTIN_PCMPESTRS128,
23707 IX86_BUILTIN_PCMPESTRZ128,
23708 IX86_BUILTIN_PCMPISTRI128,
23709 IX86_BUILTIN_PCMPISTRM128,
23710 IX86_BUILTIN_PCMPISTRA128,
23711 IX86_BUILTIN_PCMPISTRC128,
23712 IX86_BUILTIN_PCMPISTRO128,
23713 IX86_BUILTIN_PCMPISTRS128,
23714 IX86_BUILTIN_PCMPISTRZ128,
23716 IX86_BUILTIN_PCMPGTQ,
23718 /* AES instructions */
23719 IX86_BUILTIN_AESENC128,
23720 IX86_BUILTIN_AESENCLAST128,
23721 IX86_BUILTIN_AESDEC128,
23722 IX86_BUILTIN_AESDECLAST128,
23723 IX86_BUILTIN_AESIMC128,
23724 IX86_BUILTIN_AESKEYGENASSIST128,
23726 /* PCLMUL instruction */
23727 IX86_BUILTIN_PCLMULQDQ128,
23730 IX86_BUILTIN_ADDPD256,
23731 IX86_BUILTIN_ADDPS256,
23732 IX86_BUILTIN_ADDSUBPD256,
23733 IX86_BUILTIN_ADDSUBPS256,
23734 IX86_BUILTIN_ANDPD256,
23735 IX86_BUILTIN_ANDPS256,
23736 IX86_BUILTIN_ANDNPD256,
23737 IX86_BUILTIN_ANDNPS256,
23738 IX86_BUILTIN_BLENDPD256,
23739 IX86_BUILTIN_BLENDPS256,
23740 IX86_BUILTIN_BLENDVPD256,
23741 IX86_BUILTIN_BLENDVPS256,
23742 IX86_BUILTIN_DIVPD256,
23743 IX86_BUILTIN_DIVPS256,
23744 IX86_BUILTIN_DPPS256,
23745 IX86_BUILTIN_HADDPD256,
23746 IX86_BUILTIN_HADDPS256,
23747 IX86_BUILTIN_HSUBPD256,
23748 IX86_BUILTIN_HSUBPS256,
23749 IX86_BUILTIN_MAXPD256,
23750 IX86_BUILTIN_MAXPS256,
23751 IX86_BUILTIN_MINPD256,
23752 IX86_BUILTIN_MINPS256,
23753 IX86_BUILTIN_MULPD256,
23754 IX86_BUILTIN_MULPS256,
23755 IX86_BUILTIN_ORPD256,
23756 IX86_BUILTIN_ORPS256,
23757 IX86_BUILTIN_SHUFPD256,
23758 IX86_BUILTIN_SHUFPS256,
23759 IX86_BUILTIN_SUBPD256,
23760 IX86_BUILTIN_SUBPS256,
23761 IX86_BUILTIN_XORPD256,
23762 IX86_BUILTIN_XORPS256,
23763 IX86_BUILTIN_CMPSD,
23764 IX86_BUILTIN_CMPSS,
23765 IX86_BUILTIN_CMPPD,
23766 IX86_BUILTIN_CMPPS,
23767 IX86_BUILTIN_CMPPD256,
23768 IX86_BUILTIN_CMPPS256,
23769 IX86_BUILTIN_CVTDQ2PD256,
23770 IX86_BUILTIN_CVTDQ2PS256,
23771 IX86_BUILTIN_CVTPD2PS256,
23772 IX86_BUILTIN_CVTPS2DQ256,
23773 IX86_BUILTIN_CVTPS2PD256,
23774 IX86_BUILTIN_CVTTPD2DQ256,
23775 IX86_BUILTIN_CVTPD2DQ256,
23776 IX86_BUILTIN_CVTTPS2DQ256,
23777 IX86_BUILTIN_EXTRACTF128PD256,
23778 IX86_BUILTIN_EXTRACTF128PS256,
23779 IX86_BUILTIN_EXTRACTF128SI256,
23780 IX86_BUILTIN_VZEROALL,
23781 IX86_BUILTIN_VZEROUPPER,
23782 IX86_BUILTIN_VPERMILVARPD,
23783 IX86_BUILTIN_VPERMILVARPS,
23784 IX86_BUILTIN_VPERMILVARPD256,
23785 IX86_BUILTIN_VPERMILVARPS256,
23786 IX86_BUILTIN_VPERMILPD,
23787 IX86_BUILTIN_VPERMILPS,
23788 IX86_BUILTIN_VPERMILPD256,
23789 IX86_BUILTIN_VPERMILPS256,
23790 IX86_BUILTIN_VPERMIL2PD,
23791 IX86_BUILTIN_VPERMIL2PS,
23792 IX86_BUILTIN_VPERMIL2PD256,
23793 IX86_BUILTIN_VPERMIL2PS256,
23794 IX86_BUILTIN_VPERM2F128PD256,
23795 IX86_BUILTIN_VPERM2F128PS256,
23796 IX86_BUILTIN_VPERM2F128SI256,
23797 IX86_BUILTIN_VBROADCASTSS,
23798 IX86_BUILTIN_VBROADCASTSD256,
23799 IX86_BUILTIN_VBROADCASTSS256,
23800 IX86_BUILTIN_VBROADCASTPD256,
23801 IX86_BUILTIN_VBROADCASTPS256,
23802 IX86_BUILTIN_VINSERTF128PD256,
23803 IX86_BUILTIN_VINSERTF128PS256,
23804 IX86_BUILTIN_VINSERTF128SI256,
23805 IX86_BUILTIN_LOADUPD256,
23806 IX86_BUILTIN_LOADUPS256,
23807 IX86_BUILTIN_STOREUPD256,
23808 IX86_BUILTIN_STOREUPS256,
23809 IX86_BUILTIN_LDDQU256,
23810 IX86_BUILTIN_MOVNTDQ256,
23811 IX86_BUILTIN_MOVNTPD256,
23812 IX86_BUILTIN_MOVNTPS256,
23813 IX86_BUILTIN_LOADDQU256,
23814 IX86_BUILTIN_STOREDQU256,
23815 IX86_BUILTIN_MASKLOADPD,
23816 IX86_BUILTIN_MASKLOADPS,
23817 IX86_BUILTIN_MASKSTOREPD,
23818 IX86_BUILTIN_MASKSTOREPS,
23819 IX86_BUILTIN_MASKLOADPD256,
23820 IX86_BUILTIN_MASKLOADPS256,
23821 IX86_BUILTIN_MASKSTOREPD256,
23822 IX86_BUILTIN_MASKSTOREPS256,
23823 IX86_BUILTIN_MOVSHDUP256,
23824 IX86_BUILTIN_MOVSLDUP256,
23825 IX86_BUILTIN_MOVDDUP256,
23827 IX86_BUILTIN_SQRTPD256,
23828 IX86_BUILTIN_SQRTPS256,
23829 IX86_BUILTIN_SQRTPS_NR256,
23830 IX86_BUILTIN_RSQRTPS256,
23831 IX86_BUILTIN_RSQRTPS_NR256,
23833 IX86_BUILTIN_RCPPS256,
23835 IX86_BUILTIN_ROUNDPD256,
23836 IX86_BUILTIN_ROUNDPS256,
23838 IX86_BUILTIN_FLOORPD256,
23839 IX86_BUILTIN_CEILPD256,
23840 IX86_BUILTIN_TRUNCPD256,
23841 IX86_BUILTIN_RINTPD256,
23842 IX86_BUILTIN_ROUNDPD_AZ256,
23843 IX86_BUILTIN_FLOORPS256,
23844 IX86_BUILTIN_CEILPS256,
23845 IX86_BUILTIN_TRUNCPS256,
23846 IX86_BUILTIN_RINTPS256,
23847 IX86_BUILTIN_ROUNDPS_AZ256,
23849 IX86_BUILTIN_UNPCKHPD256,
23850 IX86_BUILTIN_UNPCKLPD256,
23851 IX86_BUILTIN_UNPCKHPS256,
23852 IX86_BUILTIN_UNPCKLPS256,
23854 IX86_BUILTIN_SI256_SI,
23855 IX86_BUILTIN_PS256_PS,
23856 IX86_BUILTIN_PD256_PD,
23857 IX86_BUILTIN_SI_SI256,
23858 IX86_BUILTIN_PS_PS256,
23859 IX86_BUILTIN_PD_PD256,
23861 IX86_BUILTIN_VTESTZPD,
23862 IX86_BUILTIN_VTESTCPD,
23863 IX86_BUILTIN_VTESTNZCPD,
23864 IX86_BUILTIN_VTESTZPS,
23865 IX86_BUILTIN_VTESTCPS,
23866 IX86_BUILTIN_VTESTNZCPS,
23867 IX86_BUILTIN_VTESTZPD256,
23868 IX86_BUILTIN_VTESTCPD256,
23869 IX86_BUILTIN_VTESTNZCPD256,
23870 IX86_BUILTIN_VTESTZPS256,
23871 IX86_BUILTIN_VTESTCPS256,
23872 IX86_BUILTIN_VTESTNZCPS256,
23873 IX86_BUILTIN_PTESTZ256,
23874 IX86_BUILTIN_PTESTC256,
23875 IX86_BUILTIN_PTESTNZC256,
23877 IX86_BUILTIN_MOVMSKPD256,
23878 IX86_BUILTIN_MOVMSKPS256,
23881 IX86_BUILTIN_MPSADBW256,
23882 IX86_BUILTIN_PABSB256,
23883 IX86_BUILTIN_PABSW256,
23884 IX86_BUILTIN_PABSD256,
23885 IX86_BUILTIN_PACKSSDW256,
23886 IX86_BUILTIN_PACKSSWB256,
23887 IX86_BUILTIN_PACKUSDW256,
23888 IX86_BUILTIN_PACKUSWB256,
23889 IX86_BUILTIN_PADDB256,
23890 IX86_BUILTIN_PADDW256,
23891 IX86_BUILTIN_PADDD256,
23892 IX86_BUILTIN_PADDQ256,
23893 IX86_BUILTIN_PADDSB256,
23894 IX86_BUILTIN_PADDSW256,
23895 IX86_BUILTIN_PADDUSB256,
23896 IX86_BUILTIN_PADDUSW256,
23897 IX86_BUILTIN_PALIGNR256,
23898 IX86_BUILTIN_AND256I,
23899 IX86_BUILTIN_ANDNOT256I,
23900 IX86_BUILTIN_PAVGB256,
23901 IX86_BUILTIN_PAVGW256,
23902 IX86_BUILTIN_PBLENDVB256,
23903 IX86_BUILTIN_PBLENDVW256,
23904 IX86_BUILTIN_PCMPEQB256,
23905 IX86_BUILTIN_PCMPEQW256,
23906 IX86_BUILTIN_PCMPEQD256,
23907 IX86_BUILTIN_PCMPEQQ256,
23908 IX86_BUILTIN_PCMPGTB256,
23909 IX86_BUILTIN_PCMPGTW256,
23910 IX86_BUILTIN_PCMPGTD256,
23911 IX86_BUILTIN_PCMPGTQ256,
23912 IX86_BUILTIN_PHADDW256,
23913 IX86_BUILTIN_PHADDD256,
23914 IX86_BUILTIN_PHADDSW256,
23915 IX86_BUILTIN_PHSUBW256,
23916 IX86_BUILTIN_PHSUBD256,
23917 IX86_BUILTIN_PHSUBSW256,
23918 IX86_BUILTIN_PMADDUBSW256,
23919 IX86_BUILTIN_PMADDWD256,
23920 IX86_BUILTIN_PMAXSB256,
23921 IX86_BUILTIN_PMAXSW256,
23922 IX86_BUILTIN_PMAXSD256,
23923 IX86_BUILTIN_PMAXUB256,
23924 IX86_BUILTIN_PMAXUW256,
23925 IX86_BUILTIN_PMAXUD256,
23926 IX86_BUILTIN_PMINSB256,
23927 IX86_BUILTIN_PMINSW256,
23928 IX86_BUILTIN_PMINSD256,
23929 IX86_BUILTIN_PMINUB256,
23930 IX86_BUILTIN_PMINUW256,
23931 IX86_BUILTIN_PMINUD256,
23932 IX86_BUILTIN_PMOVMSKB256,
23933 IX86_BUILTIN_PMOVSXBW256,
23934 IX86_BUILTIN_PMOVSXBD256,
23935 IX86_BUILTIN_PMOVSXBQ256,
23936 IX86_BUILTIN_PMOVSXWD256,
23937 IX86_BUILTIN_PMOVSXWQ256,
23938 IX86_BUILTIN_PMOVSXDQ256,
23939 IX86_BUILTIN_PMOVZXBW256,
23940 IX86_BUILTIN_PMOVZXBD256,
23941 IX86_BUILTIN_PMOVZXBQ256,
23942 IX86_BUILTIN_PMOVZXWD256,
23943 IX86_BUILTIN_PMOVZXWQ256,
23944 IX86_BUILTIN_PMOVZXDQ256,
23945 IX86_BUILTIN_PMULDQ256,
23946 IX86_BUILTIN_PMULHRSW256,
23947 IX86_BUILTIN_PMULHUW256,
23948 IX86_BUILTIN_PMULHW256,
23949 IX86_BUILTIN_PMULLW256,
23950 IX86_BUILTIN_PMULLD256,
23951 IX86_BUILTIN_PMULUDQ256,
23952 IX86_BUILTIN_POR256,
23953 IX86_BUILTIN_PSADBW256,
23954 IX86_BUILTIN_PSHUFB256,
23955 IX86_BUILTIN_PSHUFD256,
23956 IX86_BUILTIN_PSHUFHW256,
23957 IX86_BUILTIN_PSHUFLW256,
23958 IX86_BUILTIN_PSIGNB256,
23959 IX86_BUILTIN_PSIGNW256,
23960 IX86_BUILTIN_PSIGND256,
23961 IX86_BUILTIN_PSLLDQI256,
23962 IX86_BUILTIN_PSLLWI256,
23963 IX86_BUILTIN_PSLLW256,
23964 IX86_BUILTIN_PSLLDI256,
23965 IX86_BUILTIN_PSLLD256,
23966 IX86_BUILTIN_PSLLQI256,
23967 IX86_BUILTIN_PSLLQ256,
23968 IX86_BUILTIN_PSRAWI256,
23969 IX86_BUILTIN_PSRAW256,
23970 IX86_BUILTIN_PSRADI256,
23971 IX86_BUILTIN_PSRAD256,
23972 IX86_BUILTIN_PSRLDQI256,
23973 IX86_BUILTIN_PSRLWI256,
23974 IX86_BUILTIN_PSRLW256,
23975 IX86_BUILTIN_PSRLDI256,
23976 IX86_BUILTIN_PSRLD256,
23977 IX86_BUILTIN_PSRLQI256,
23978 IX86_BUILTIN_PSRLQ256,
23979 IX86_BUILTIN_PSUBB256,
23980 IX86_BUILTIN_PSUBW256,
23981 IX86_BUILTIN_PSUBD256,
23982 IX86_BUILTIN_PSUBQ256,
23983 IX86_BUILTIN_PSUBSB256,
23984 IX86_BUILTIN_PSUBSW256,
23985 IX86_BUILTIN_PSUBUSB256,
23986 IX86_BUILTIN_PSUBUSW256,
23987 IX86_BUILTIN_PUNPCKHBW256,
23988 IX86_BUILTIN_PUNPCKHWD256,
23989 IX86_BUILTIN_PUNPCKHDQ256,
23990 IX86_BUILTIN_PUNPCKHQDQ256,
23991 IX86_BUILTIN_PUNPCKLBW256,
23992 IX86_BUILTIN_PUNPCKLWD256,
23993 IX86_BUILTIN_PUNPCKLDQ256,
23994 IX86_BUILTIN_PUNPCKLQDQ256,
23995 IX86_BUILTIN_PXOR256,
23996 IX86_BUILTIN_MOVNTDQA256,
23997 IX86_BUILTIN_VBROADCASTSS_PS,
23998 IX86_BUILTIN_VBROADCASTSS_PS256,
23999 IX86_BUILTIN_VBROADCASTSD_PD256,
24000 IX86_BUILTIN_VBROADCASTSI256,
24001 IX86_BUILTIN_PBLENDD256,
24002 IX86_BUILTIN_PBLENDD128,
24003 IX86_BUILTIN_PBROADCASTB256,
24004 IX86_BUILTIN_PBROADCASTW256,
24005 IX86_BUILTIN_PBROADCASTD256,
24006 IX86_BUILTIN_PBROADCASTQ256,
24007 IX86_BUILTIN_PBROADCASTB128,
24008 IX86_BUILTIN_PBROADCASTW128,
24009 IX86_BUILTIN_PBROADCASTD128,
24010 IX86_BUILTIN_PBROADCASTQ128,
24011 IX86_BUILTIN_VPERMVARSI256,
24012 IX86_BUILTIN_VPERMDF256,
24013 IX86_BUILTIN_VPERMVARSF256,
24014 IX86_BUILTIN_VPERMDI256,
24015 IX86_BUILTIN_VPERMTI256,
24016 IX86_BUILTIN_VEXTRACT128I256,
24017 IX86_BUILTIN_VINSERT128I256,
24018 IX86_BUILTIN_MASKLOADD,
24019 IX86_BUILTIN_MASKLOADQ,
24020 IX86_BUILTIN_MASKLOADD256,
24021 IX86_BUILTIN_MASKLOADQ256,
24022 IX86_BUILTIN_MASKSTORED,
24023 IX86_BUILTIN_MASKSTOREQ,
24024 IX86_BUILTIN_MASKSTORED256,
24025 IX86_BUILTIN_MASKSTOREQ256,
24026 IX86_BUILTIN_PSLLVV4DI,
24027 IX86_BUILTIN_PSLLVV2DI,
24028 IX86_BUILTIN_PSLLVV8SI,
24029 IX86_BUILTIN_PSLLVV4SI,
24030 IX86_BUILTIN_PSRAVV8SI,
24031 IX86_BUILTIN_PSRAVV4SI,
24032 IX86_BUILTIN_PSRLVV4DI,
24033 IX86_BUILTIN_PSRLVV2DI,
24034 IX86_BUILTIN_PSRLVV8SI,
24035 IX86_BUILTIN_PSRLVV4SI,
24037 IX86_BUILTIN_GATHERSIV2DF,
24038 IX86_BUILTIN_GATHERSIV4DF,
24039 IX86_BUILTIN_GATHERDIV2DF,
24040 IX86_BUILTIN_GATHERDIV4DF,
24041 IX86_BUILTIN_GATHERSIV4SF,
24042 IX86_BUILTIN_GATHERSIV8SF,
24043 IX86_BUILTIN_GATHERDIV4SF,
24044 IX86_BUILTIN_GATHERDIV8SF,
24045 IX86_BUILTIN_GATHERSIV2DI,
24046 IX86_BUILTIN_GATHERSIV4DI,
24047 IX86_BUILTIN_GATHERDIV2DI,
24048 IX86_BUILTIN_GATHERDIV4DI,
24049 IX86_BUILTIN_GATHERSIV4SI,
24050 IX86_BUILTIN_GATHERSIV8SI,
24051 IX86_BUILTIN_GATHERDIV4SI,
24052 IX86_BUILTIN_GATHERDIV8SI,
24054 /* TFmode support builtins. */
24056 IX86_BUILTIN_HUGE_VALQ,
24057 IX86_BUILTIN_FABSQ,
24058 IX86_BUILTIN_COPYSIGNQ,
24060 /* Vectorizer support builtins. */
24061 IX86_BUILTIN_CPYSGNPS,
24062 IX86_BUILTIN_CPYSGNPD,
24063 IX86_BUILTIN_CPYSGNPS256,
24064 IX86_BUILTIN_CPYSGNPD256,
24066 IX86_BUILTIN_CVTUDQ2PS,
24068 IX86_BUILTIN_VEC_PERM_V2DF,
24069 IX86_BUILTIN_VEC_PERM_V4SF,
24070 IX86_BUILTIN_VEC_PERM_V2DI,
24071 IX86_BUILTIN_VEC_PERM_V4SI,
24072 IX86_BUILTIN_VEC_PERM_V8HI,
24073 IX86_BUILTIN_VEC_PERM_V16QI,
24074 IX86_BUILTIN_VEC_PERM_V2DI_U,
24075 IX86_BUILTIN_VEC_PERM_V4SI_U,
24076 IX86_BUILTIN_VEC_PERM_V8HI_U,
24077 IX86_BUILTIN_VEC_PERM_V16QI_U,
24078 IX86_BUILTIN_VEC_PERM_V4DF,
24079 IX86_BUILTIN_VEC_PERM_V8SF,
24081 /* FMA4 and XOP instructions. */
24082 IX86_BUILTIN_VFMADDSS,
24083 IX86_BUILTIN_VFMADDSD,
24084 IX86_BUILTIN_VFMADDPS,
24085 IX86_BUILTIN_VFMADDPD,
24086 IX86_BUILTIN_VFMADDPS256,
24087 IX86_BUILTIN_VFMADDPD256,
24088 IX86_BUILTIN_VFMADDSUBPS,
24089 IX86_BUILTIN_VFMADDSUBPD,
24090 IX86_BUILTIN_VFMADDSUBPS256,
24091 IX86_BUILTIN_VFMADDSUBPD256,
24093 IX86_BUILTIN_VPCMOV,
24094 IX86_BUILTIN_VPCMOV_V2DI,
24095 IX86_BUILTIN_VPCMOV_V4SI,
24096 IX86_BUILTIN_VPCMOV_V8HI,
24097 IX86_BUILTIN_VPCMOV_V16QI,
24098 IX86_BUILTIN_VPCMOV_V4SF,
24099 IX86_BUILTIN_VPCMOV_V2DF,
24100 IX86_BUILTIN_VPCMOV256,
24101 IX86_BUILTIN_VPCMOV_V4DI256,
24102 IX86_BUILTIN_VPCMOV_V8SI256,
24103 IX86_BUILTIN_VPCMOV_V16HI256,
24104 IX86_BUILTIN_VPCMOV_V32QI256,
24105 IX86_BUILTIN_VPCMOV_V8SF256,
24106 IX86_BUILTIN_VPCMOV_V4DF256,
24108 IX86_BUILTIN_VPPERM,
24110 IX86_BUILTIN_VPMACSSWW,
24111 IX86_BUILTIN_VPMACSWW,
24112 IX86_BUILTIN_VPMACSSWD,
24113 IX86_BUILTIN_VPMACSWD,
24114 IX86_BUILTIN_VPMACSSDD,
24115 IX86_BUILTIN_VPMACSDD,
24116 IX86_BUILTIN_VPMACSSDQL,
24117 IX86_BUILTIN_VPMACSSDQH,
24118 IX86_BUILTIN_VPMACSDQL,
24119 IX86_BUILTIN_VPMACSDQH,
24120 IX86_BUILTIN_VPMADCSSWD,
24121 IX86_BUILTIN_VPMADCSWD,
24123 IX86_BUILTIN_VPHADDBW,
24124 IX86_BUILTIN_VPHADDBD,
24125 IX86_BUILTIN_VPHADDBQ,
24126 IX86_BUILTIN_VPHADDWD,
24127 IX86_BUILTIN_VPHADDWQ,
24128 IX86_BUILTIN_VPHADDDQ,
24129 IX86_BUILTIN_VPHADDUBW,
24130 IX86_BUILTIN_VPHADDUBD,
24131 IX86_BUILTIN_VPHADDUBQ,
24132 IX86_BUILTIN_VPHADDUWD,
24133 IX86_BUILTIN_VPHADDUWQ,
24134 IX86_BUILTIN_VPHADDUDQ,
24135 IX86_BUILTIN_VPHSUBBW,
24136 IX86_BUILTIN_VPHSUBWD,
24137 IX86_BUILTIN_VPHSUBDQ,
24139 IX86_BUILTIN_VPROTB,
24140 IX86_BUILTIN_VPROTW,
24141 IX86_BUILTIN_VPROTD,
24142 IX86_BUILTIN_VPROTQ,
24143 IX86_BUILTIN_VPROTB_IMM,
24144 IX86_BUILTIN_VPROTW_IMM,
24145 IX86_BUILTIN_VPROTD_IMM,
24146 IX86_BUILTIN_VPROTQ_IMM,
24148 IX86_BUILTIN_VPSHLB,
24149 IX86_BUILTIN_VPSHLW,
24150 IX86_BUILTIN_VPSHLD,
24151 IX86_BUILTIN_VPSHLQ,
24152 IX86_BUILTIN_VPSHAB,
24153 IX86_BUILTIN_VPSHAW,
24154 IX86_BUILTIN_VPSHAD,
24155 IX86_BUILTIN_VPSHAQ,
24157 IX86_BUILTIN_VFRCZSS,
24158 IX86_BUILTIN_VFRCZSD,
24159 IX86_BUILTIN_VFRCZPS,
24160 IX86_BUILTIN_VFRCZPD,
24161 IX86_BUILTIN_VFRCZPS256,
24162 IX86_BUILTIN_VFRCZPD256,
24164 IX86_BUILTIN_VPCOMEQUB,
24165 IX86_BUILTIN_VPCOMNEUB,
24166 IX86_BUILTIN_VPCOMLTUB,
24167 IX86_BUILTIN_VPCOMLEUB,
24168 IX86_BUILTIN_VPCOMGTUB,
24169 IX86_BUILTIN_VPCOMGEUB,
24170 IX86_BUILTIN_VPCOMFALSEUB,
24171 IX86_BUILTIN_VPCOMTRUEUB,
24173 IX86_BUILTIN_VPCOMEQUW,
24174 IX86_BUILTIN_VPCOMNEUW,
24175 IX86_BUILTIN_VPCOMLTUW,
24176 IX86_BUILTIN_VPCOMLEUW,
24177 IX86_BUILTIN_VPCOMGTUW,
24178 IX86_BUILTIN_VPCOMGEUW,
24179 IX86_BUILTIN_VPCOMFALSEUW,
24180 IX86_BUILTIN_VPCOMTRUEUW,
24182 IX86_BUILTIN_VPCOMEQUD,
24183 IX86_BUILTIN_VPCOMNEUD,
24184 IX86_BUILTIN_VPCOMLTUD,
24185 IX86_BUILTIN_VPCOMLEUD,
24186 IX86_BUILTIN_VPCOMGTUD,
24187 IX86_BUILTIN_VPCOMGEUD,
24188 IX86_BUILTIN_VPCOMFALSEUD,
24189 IX86_BUILTIN_VPCOMTRUEUD,
24191 IX86_BUILTIN_VPCOMEQUQ,
24192 IX86_BUILTIN_VPCOMNEUQ,
24193 IX86_BUILTIN_VPCOMLTUQ,
24194 IX86_BUILTIN_VPCOMLEUQ,
24195 IX86_BUILTIN_VPCOMGTUQ,
24196 IX86_BUILTIN_VPCOMGEUQ,
24197 IX86_BUILTIN_VPCOMFALSEUQ,
24198 IX86_BUILTIN_VPCOMTRUEUQ,
24200 IX86_BUILTIN_VPCOMEQB,
24201 IX86_BUILTIN_VPCOMNEB,
24202 IX86_BUILTIN_VPCOMLTB,
24203 IX86_BUILTIN_VPCOMLEB,
24204 IX86_BUILTIN_VPCOMGTB,
24205 IX86_BUILTIN_VPCOMGEB,
24206 IX86_BUILTIN_VPCOMFALSEB,
24207 IX86_BUILTIN_VPCOMTRUEB,
24209 IX86_BUILTIN_VPCOMEQW,
24210 IX86_BUILTIN_VPCOMNEW,
24211 IX86_BUILTIN_VPCOMLTW,
24212 IX86_BUILTIN_VPCOMLEW,
24213 IX86_BUILTIN_VPCOMGTW,
24214 IX86_BUILTIN_VPCOMGEW,
24215 IX86_BUILTIN_VPCOMFALSEW,
24216 IX86_BUILTIN_VPCOMTRUEW,
24218 IX86_BUILTIN_VPCOMEQD,
24219 IX86_BUILTIN_VPCOMNED,
24220 IX86_BUILTIN_VPCOMLTD,
24221 IX86_BUILTIN_VPCOMLED,
24222 IX86_BUILTIN_VPCOMGTD,
24223 IX86_BUILTIN_VPCOMGED,
24224 IX86_BUILTIN_VPCOMFALSED,
24225 IX86_BUILTIN_VPCOMTRUED,
24227 IX86_BUILTIN_VPCOMEQQ,
24228 IX86_BUILTIN_VPCOMNEQ,
24229 IX86_BUILTIN_VPCOMLTQ,
24230 IX86_BUILTIN_VPCOMLEQ,
24231 IX86_BUILTIN_VPCOMGTQ,
24232 IX86_BUILTIN_VPCOMGEQ,
24233 IX86_BUILTIN_VPCOMFALSEQ,
24234 IX86_BUILTIN_VPCOMTRUEQ,
24236 /* LWP instructions. */
24237 IX86_BUILTIN_LLWPCB,
24238 IX86_BUILTIN_SLWPCB,
24239 IX86_BUILTIN_LWPVAL32,
24240 IX86_BUILTIN_LWPVAL64,
24241 IX86_BUILTIN_LWPINS32,
24242 IX86_BUILTIN_LWPINS64,
24246 /* BMI instructions. */
24247 IX86_BUILTIN_BEXTR32,
24248 IX86_BUILTIN_BEXTR64,
24251 /* TBM instructions. */
24252 IX86_BUILTIN_BEXTRI32,
24253 IX86_BUILTIN_BEXTRI64,
24255 /* BMI2 instructions. */
24256 IX86_BUILTIN_BZHI32,
24257 IX86_BUILTIN_BZHI64,
24258 IX86_BUILTIN_PDEP32,
24259 IX86_BUILTIN_PDEP64,
24260 IX86_BUILTIN_PEXT32,
24261 IX86_BUILTIN_PEXT64,
24263 /* FSGSBASE instructions. */
24264 IX86_BUILTIN_RDFSBASE32,
24265 IX86_BUILTIN_RDFSBASE64,
24266 IX86_BUILTIN_RDGSBASE32,
24267 IX86_BUILTIN_RDGSBASE64,
24268 IX86_BUILTIN_WRFSBASE32,
24269 IX86_BUILTIN_WRFSBASE64,
24270 IX86_BUILTIN_WRGSBASE32,
24271 IX86_BUILTIN_WRGSBASE64,
24273 /* RDRND instructions. */
24274 IX86_BUILTIN_RDRAND16_STEP,
24275 IX86_BUILTIN_RDRAND32_STEP,
24276 IX86_BUILTIN_RDRAND64_STEP,
24278 /* F16C instructions. */
24279 IX86_BUILTIN_CVTPH2PS,
24280 IX86_BUILTIN_CVTPH2PS256,
24281 IX86_BUILTIN_CVTPS2PH,
24282 IX86_BUILTIN_CVTPS2PH256,
24284 /* CFString built-in for darwin */
24285 IX86_BUILTIN_CFSTRING,
24290 /* Table for the ix86 builtin decls. */
24291 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24293 /* Table of all of the builtin functions that are possible with different ISA's
24294 but are waiting to be built until a function is declared to use that
24296 struct builtin_isa {
24297 const char *name; /* function name */
24298 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24299 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
24300 bool const_p; /* true if the declaration is constant */
24301 bool set_and_not_built_p;
24304 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24307 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24308 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24309 function decl in the ix86_builtins array. Returns the function decl or
24310 NULL_TREE, if the builtin was not added.
24312 If the front end has a special hook for builtin functions, delay adding
24313 builtin functions that aren't in the current ISA until the ISA is changed
24314 with function specific optimization. Doing so, can save about 300K for the
24315 default compiler. When the builtin is expanded, check at that time whether
24318 If the front end doesn't have a special hook, record all builtins, even if
24319 it isn't an instruction set in the current ISA in case the user uses
24320 function specific options for a different ISA, so that we don't get scope
24321 errors if a builtin is added in the middle of a function scope. */
24324 def_builtin (HOST_WIDE_INT mask, const char *name,
24325 enum ix86_builtin_func_type tcode,
24326 enum ix86_builtins code)
24328 tree decl = NULL_TREE;
24330 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24332 ix86_builtins_isa[(int) code].isa = mask;
24334 mask &= ~OPTION_MASK_ISA_64BIT;
24336 || (mask & ix86_isa_flags) != 0
24337 || (lang_hooks.builtin_function
24338 == lang_hooks.builtin_function_ext_scope))
24341 tree type = ix86_get_builtin_func_type (tcode);
24342 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24344 ix86_builtins[(int) code] = decl;
24345 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24349 ix86_builtins[(int) code] = NULL_TREE;
24350 ix86_builtins_isa[(int) code].tcode = tcode;
24351 ix86_builtins_isa[(int) code].name = name;
24352 ix86_builtins_isa[(int) code].const_p = false;
24353 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24360 /* Like def_builtin, but also marks the function decl "const". */
24363 def_builtin_const (HOST_WIDE_INT mask, const char *name,
24364 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24366 tree decl = def_builtin (mask, name, tcode, code);
24368 TREE_READONLY (decl) = 1;
24370 ix86_builtins_isa[(int) code].const_p = true;
24375 /* Add any new builtin functions for a given ISA that may not have been
24376 declared. This saves a bit of space compared to adding all of the
24377 declarations to the tree, even if we didn't use them. */
24380 ix86_add_new_builtins (HOST_WIDE_INT isa)
24384 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24386 if ((ix86_builtins_isa[i].isa & isa) != 0
24387 && ix86_builtins_isa[i].set_and_not_built_p)
24391 /* Don't define the builtin again. */
24392 ix86_builtins_isa[i].set_and_not_built_p = false;
24394 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24395 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24396 type, i, BUILT_IN_MD, NULL,
24399 ix86_builtins[i] = decl;
24400 if (ix86_builtins_isa[i].const_p)
24401 TREE_READONLY (decl) = 1;
24406 /* Bits for builtin_description.flag. */
24408 /* Set when we don't support the comparison natively, and should
24409 swap_comparison in order to support it. */
24410 #define BUILTIN_DESC_SWAP_OPERANDS 1
24412 struct builtin_description
24414 const HOST_WIDE_INT mask;
24415 const enum insn_code icode;
24416 const char *const name;
24417 const enum ix86_builtins code;
24418 const enum rtx_code comparison;
24422 static const struct builtin_description bdesc_comi[] =
24424 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24425 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24433 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24450 static const struct builtin_description bdesc_pcmpestr[] =
24453 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24454 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24455 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24456 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24457 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24458 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24459 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24462 static const struct builtin_description bdesc_pcmpistr[] =
24465 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24466 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24467 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24468 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24469 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24470 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24471 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24474 /* Special builtins with variable number of arguments. */
24475 static const struct builtin_description bdesc_special_args[] =
24477 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24478 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24479 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
24482 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24485 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24489 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24490 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24494 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24495 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24497 /* SSE or 3DNow!A */
24498 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24499 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24516 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24519 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24522 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24523 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24526 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24527 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24529 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24530 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24531 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24532 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24533 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24536 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24538 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24539 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24540 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24543 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24544 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24545 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24549 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24550 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24551 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24552 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24553 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24557 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
24558 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
24559 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
24560 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
24561 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
24562 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
24563 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
24564 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
24565 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
24567 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24568 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24569 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24570 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24571 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24572 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24575 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24576 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24577 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24578 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24579 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24580 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24581 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24582 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24585 /* Builtins with variable number of arguments. */
24586 static const struct builtin_description bdesc_args[] =
24588 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24589 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24590 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24591 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24592 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24593 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24594 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24597 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24598 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24599 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24600 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24601 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24602 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24604 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24605 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24606 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24607 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24608 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24609 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24610 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24611 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24613 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24614 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24616 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24617 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24618 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24619 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24621 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24622 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24623 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24624 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24625 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24626 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24628 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24629 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24630 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24631 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24632 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24633 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24635 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24636 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24637 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24639 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24641 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24642 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24643 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24644 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24645 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24646 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24648 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24649 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24650 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24651 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24652 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24653 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24655 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24656 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24657 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24658 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24661 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24662 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24663 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24664 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24666 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24667 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24668 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24669 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24670 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24671 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24672 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24673 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24674 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24675 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24676 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24677 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24678 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24679 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24680 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24683 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24684 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24685 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24686 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24687 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24688 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24691 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24692 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24693 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24694 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24695 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24696 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24697 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24698 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24699 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24700 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24701 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24702 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24704 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24706 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24707 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24708 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24709 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24710 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24711 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24712 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24713 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24715 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24716 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24717 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24718 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24719 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24720 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24721 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24722 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24723 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24724 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24725 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24726 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24727 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24728 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24729 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24730 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24731 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24732 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24733 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24734 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24735 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24736 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24738 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24739 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24740 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24741 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24743 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24744 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24745 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24746 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24748 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24750 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24751 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24752 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24753 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24754 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24756 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24757 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24758 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24760 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24762 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24763 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24764 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24766 /* SSE MMX or 3Dnow!A */
24767 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24768 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24769 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24771 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24772 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24773 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24774 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24776 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24777 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24779 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24782 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24784 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24785 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24786 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24787 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24788 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24789 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24790 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24791 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24792 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24793 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24794 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24795 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24797 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24798 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24799 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24800 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24801 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24802 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24804 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24805 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24806 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24807 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24808 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24810 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24812 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24813 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24814 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24815 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24817 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24818 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24819 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24821 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24822 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24823 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24824 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24825 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24826 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24827 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24830 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24831 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24832 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24833 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24834 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24835 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24836 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24837 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24838 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24839 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24840 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24841 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24842 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24843 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24844 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24845 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24846 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24847 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24848 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24849 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24851 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24852 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24853 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24854 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24856 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24857 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24858 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24859 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24861 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24863 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24864 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24865 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24867 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24869 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24870 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24871 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24872 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24873 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24874 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24875 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24876 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24878 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24879 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24880 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24881 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24882 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24883 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24884 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24885 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24887 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24888 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24890 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24891 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24892 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24893 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24895 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24896 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24898 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24899 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24900 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24901 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24902 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24903 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24905 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24906 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24907 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24908 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24910 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24911 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24912 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24913 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24914 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24915 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24916 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24917 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24919 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24920 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24921 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24923 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24924 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24926 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24927 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24929 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24931 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24932 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24933 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24934 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24936 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24937 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24938 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24939 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24940 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24941 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24942 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24945 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24946 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24947 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24948 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24949 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24950 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24952 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24953 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24954 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24955 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24957 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
24958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
24963 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
24964 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
24966 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24969 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24970 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24973 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
24974 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24976 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24977 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24978 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24979 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24980 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24981 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24984 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
24985 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
24986 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24987 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
24988 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
24989 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24991 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24992 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24993 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24994 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24995 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24996 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24997 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24998 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24999 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25000 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25001 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25002 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25003 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
25004 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
25005 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25006 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25007 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25008 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25009 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25010 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25011 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25012 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25013 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25014 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25017 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
25018 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
25021 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25022 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25023 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
25024 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
25025 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25026 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25027 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25028 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
25029 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25030 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
25032 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25033 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25034 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25035 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25036 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25037 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25038 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25039 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25040 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25041 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25042 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25043 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25044 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25046 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25047 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25048 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25049 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25050 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25051 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25052 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25053 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25054 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25055 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25056 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25057 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25060 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25061 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25062 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25063 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25065 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
25066 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
25067 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
25068 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
25070 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
25072 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
25073 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
25074 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
25075 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
25077 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25079 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25080 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25081 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25084 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25085 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
25086 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
25087 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25088 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25091 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
25092 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
25093 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
25094 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25097 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
25098 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25100 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25101 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25102 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25103 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25106 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
25109 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25110 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25111 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25113 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25114 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25115 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25116 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25117 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25118 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25119 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25120 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25121 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25123 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25124 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25125 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25126 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25127 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25128 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25129 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25130 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25131 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25132 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25133 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25134 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25136 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
25137 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
25138 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
25139 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
25141 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25142 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25143 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
25144 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
25145 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25146 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25147 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25148 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25149 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25150 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25151 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25152 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25153 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25154 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
25155 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
25156 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
25157 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
25158 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
25159 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
25160 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25161 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
25162 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25163 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25164 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25165 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25166 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25167 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25168 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25169 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25170 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25171 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25172 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
25173 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
25174 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
25176 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25177 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25178 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25180 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25181 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25182 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25183 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25184 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25186 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25188 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25189 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25191 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
25192 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
25193 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
25194 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
25196 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25198 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
25199 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
25200 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
25201 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
25203 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25205 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25206 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25207 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25208 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25210 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25211 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25212 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25213 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
25214 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
25215 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25217 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25218 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25219 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25220 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25221 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25222 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25223 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25224 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25225 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25226 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25227 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25228 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25229 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25230 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25231 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25234 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25236 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25237 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25240 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
25241 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
25242 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
25243 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
25244 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
25245 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
25246 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
25247 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
25248 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25249 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25250 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25251 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25252 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25253 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25254 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25255 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25256 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv4di, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
25257 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25258 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25259 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25260 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25261 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
25262 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
25263 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25264 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25265 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25266 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25267 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25268 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25269 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25270 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25271 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25272 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25273 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25274 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25275 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25276 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25277 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
25278 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
25279 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25280 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25281 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25282 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25283 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25284 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25285 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25286 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25287 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25288 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25289 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25290 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25291 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
25292 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
25293 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
25294 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
25295 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
25296 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
25297 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
25298 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
25299 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
25300 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
25301 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
25302 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
25303 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
25304 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
25305 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25306 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25307 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25308 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25309 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25310 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
25311 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25312 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
25313 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25314 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
25315 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
25316 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
25317 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25318 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25319 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25320 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlqv4di3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
25321 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
25322 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
25323 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
25324 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
25325 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
25326 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
25327 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
25328 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
25329 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
25330 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
25331 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrqv4di3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
25332 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
25333 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
25334 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
25335 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
25336 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
25337 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
25338 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25339 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25340 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25341 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25342 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25343 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25344 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25345 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25346 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25347 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25348 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25349 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25350 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25351 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25352 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25353 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25354 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25355 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25356 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25357 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25358 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
25359 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
25360 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25361 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
25362 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
25363 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25364 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
25365 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
25366 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25367 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
25368 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25369 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25370 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25371 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25372 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
25373 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
25374 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
25375 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
25376 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25377 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25378 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25379 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25380 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25381 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25382 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25383 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25384 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25385 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25387 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25390 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25391 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25392 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25395 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25396 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25399 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25400 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25401 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25402 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25405 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25406 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25407 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25408 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25409 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25410 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25413 /* FMA4 and XOP. */
25414 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25415 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25416 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25417 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25418 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25419 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25420 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25421 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25422 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25423 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25424 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25425 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25426 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25427 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25428 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25429 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25430 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25431 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25432 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25433 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25434 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25435 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25436 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25437 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25438 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25439 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25440 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25441 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25442 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25443 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25444 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25445 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25446 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25447 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25448 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25449 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25450 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25451 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25452 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25453 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25454 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25455 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25456 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25457 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25458 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25459 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25460 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25461 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25462 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25463 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25464 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25465 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25467 static const struct builtin_description bdesc_multi_arg[] =
25469 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25470 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25471 UNKNOWN, (int)MULTI_ARG_3_SF },
25472 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25473 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25474 UNKNOWN, (int)MULTI_ARG_3_DF },
25476 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25477 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25478 UNKNOWN, (int)MULTI_ARG_3_SF },
25479 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25480 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25481 UNKNOWN, (int)MULTI_ARG_3_DF },
25482 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25483 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25484 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25485 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25486 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25487 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25489 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25490 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25491 UNKNOWN, (int)MULTI_ARG_3_SF },
25492 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25493 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25494 UNKNOWN, (int)MULTI_ARG_3_DF },
25495 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25496 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25497 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25498 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25499 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25500 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25629 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25631 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25632 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25633 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25635 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25637 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25638 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25639 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25640 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25641 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25642 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25643 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25644 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25646 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25647 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25648 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25652 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25655 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25656 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25657 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25658 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25662 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25663 in the current target ISA to allow the user to compile particular modules
25664 with different target specific options that differ from the command line
25667 ix86_init_mmx_sse_builtins (void)
25669 const struct builtin_description * d;
25670 enum ix86_builtin_func_type ftype;
25673 /* Add all special builtins with variable number of operands. */
25674 for (i = 0, d = bdesc_special_args;
25675 i < ARRAY_SIZE (bdesc_special_args);
25681 ftype = (enum ix86_builtin_func_type) d->flag;
25682 def_builtin (d->mask, d->name, ftype, d->code);
25685 /* Add all builtins with variable number of operands. */
25686 for (i = 0, d = bdesc_args;
25687 i < ARRAY_SIZE (bdesc_args);
25693 ftype = (enum ix86_builtin_func_type) d->flag;
25694 def_builtin_const (d->mask, d->name, ftype, d->code);
25697 /* pcmpestr[im] insns. */
25698 for (i = 0, d = bdesc_pcmpestr;
25699 i < ARRAY_SIZE (bdesc_pcmpestr);
25702 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25703 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25705 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25706 def_builtin_const (d->mask, d->name, ftype, d->code);
25709 /* pcmpistr[im] insns. */
25710 for (i = 0, d = bdesc_pcmpistr;
25711 i < ARRAY_SIZE (bdesc_pcmpistr);
25714 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25715 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25717 ftype = INT_FTYPE_V16QI_V16QI_INT;
25718 def_builtin_const (d->mask, d->name, ftype, d->code);
25721 /* comi/ucomi insns. */
25722 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25724 if (d->mask == OPTION_MASK_ISA_SSE2)
25725 ftype = INT_FTYPE_V2DF_V2DF;
25727 ftype = INT_FTYPE_V4SF_V4SF;
25728 def_builtin_const (d->mask, d->name, ftype, d->code);
25732 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25733 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25734 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25735 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25737 /* SSE or 3DNow!A */
25738 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25739 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25740 IX86_BUILTIN_MASKMOVQ);
25743 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25744 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25746 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25747 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25748 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25749 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25752 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25753 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25754 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25755 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25758 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25759 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25760 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25761 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25762 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25763 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25764 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25765 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25766 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25767 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25768 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25769 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25772 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25773 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25776 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25777 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25778 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25779 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25780 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25781 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25782 IX86_BUILTIN_RDRAND64_STEP);
25785 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
25786 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
25787 IX86_BUILTIN_GATHERSIV2DF);
25789 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
25790 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
25791 IX86_BUILTIN_GATHERSIV4DF);
25793 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
25794 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
25795 IX86_BUILTIN_GATHERDIV2DF);
25797 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
25798 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
25799 IX86_BUILTIN_GATHERDIV4DF);
25801 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
25802 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
25803 IX86_BUILTIN_GATHERSIV4SF);
25805 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
25806 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
25807 IX86_BUILTIN_GATHERSIV8SF);
25809 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
25810 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
25811 IX86_BUILTIN_GATHERDIV4SF);
25813 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
25814 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
25815 IX86_BUILTIN_GATHERDIV8SF);
25817 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
25818 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
25819 IX86_BUILTIN_GATHERSIV2DI);
25821 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
25822 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
25823 IX86_BUILTIN_GATHERSIV4DI);
25825 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
25826 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
25827 IX86_BUILTIN_GATHERDIV2DI);
25829 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
25830 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
25831 IX86_BUILTIN_GATHERDIV4DI);
25833 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
25834 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
25835 IX86_BUILTIN_GATHERSIV4SI);
25837 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
25838 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
25839 IX86_BUILTIN_GATHERSIV8SI);
25841 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
25842 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
25843 IX86_BUILTIN_GATHERDIV4SI);
25845 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
25846 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
25847 IX86_BUILTIN_GATHERDIV8SI);
25849 /* MMX access to the vec_init patterns. */
25850 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25851 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25853 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25854 V4HI_FTYPE_HI_HI_HI_HI,
25855 IX86_BUILTIN_VEC_INIT_V4HI);
25857 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25858 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25859 IX86_BUILTIN_VEC_INIT_V8QI);
25861 /* Access to the vec_extract patterns. */
25862 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25863 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25864 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25865 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25866 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25867 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25868 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25869 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25870 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25871 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25873 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25874 "__builtin_ia32_vec_ext_v4hi",
25875 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25877 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25878 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25880 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25881 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25883 /* Access to the vec_set patterns. */
25884 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25885 "__builtin_ia32_vec_set_v2di",
25886 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25888 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25889 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25891 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25892 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25894 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25895 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25897 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25898 "__builtin_ia32_vec_set_v4hi",
25899 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25901 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25902 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25904 /* Add FMA4 multi-arg argument instructions */
25905 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25910 ftype = (enum ix86_builtin_func_type) d->flag;
25911 def_builtin_const (d->mask, d->name, ftype, d->code);
25915 /* Internal method for ix86_init_builtins. */
25918 ix86_init_builtins_va_builtins_abi (void)
25920 tree ms_va_ref, sysv_va_ref;
25921 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25922 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25923 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25924 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25928 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25929 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25930 ms_va_ref = build_reference_type (ms_va_list_type_node);
25932 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25935 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25936 fnvoid_va_start_ms =
25937 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25938 fnvoid_va_end_sysv =
25939 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25940 fnvoid_va_start_sysv =
25941 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25943 fnvoid_va_copy_ms =
25944 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25946 fnvoid_va_copy_sysv =
25947 build_function_type_list (void_type_node, sysv_va_ref,
25948 sysv_va_ref, NULL_TREE);
25950 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25951 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25952 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25953 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25954 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25955 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25956 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25957 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25958 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25959 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25960 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25961 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25965 ix86_init_builtin_types (void)
25967 tree float128_type_node, float80_type_node;
25969 /* The __float80 type. */
25970 float80_type_node = long_double_type_node;
25971 if (TYPE_MODE (float80_type_node) != XFmode)
25973 /* The __float80 type. */
25974 float80_type_node = make_node (REAL_TYPE);
25976 TYPE_PRECISION (float80_type_node) = 80;
25977 layout_type (float80_type_node);
25979 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25981 /* The __float128 type. */
25982 float128_type_node = make_node (REAL_TYPE);
25983 TYPE_PRECISION (float128_type_node) = 128;
25984 layout_type (float128_type_node);
25985 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25987 /* This macro is built by i386-builtin-types.awk. */
25988 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25992 ix86_init_builtins (void)
25996 ix86_init_builtin_types ();
25998 /* TFmode support builtins. */
25999 def_builtin_const (0, "__builtin_infq",
26000 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
26001 def_builtin_const (0, "__builtin_huge_valq",
26002 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
26004 /* We will expand them to normal call if SSE2 isn't available since
26005 they are used by libgcc. */
26006 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
26007 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
26008 BUILT_IN_MD, "__fabstf2", NULL_TREE);
26009 TREE_READONLY (t) = 1;
26010 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
26012 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
26013 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
26014 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
26015 TREE_READONLY (t) = 1;
26016 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
26018 ix86_init_mmx_sse_builtins ();
26021 ix86_init_builtins_va_builtins_abi ();
26023 #ifdef SUBTARGET_INIT_BUILTINS
26024 SUBTARGET_INIT_BUILTINS;
26028 /* Return the ix86 builtin for CODE. */
26031 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
26033 if (code >= IX86_BUILTIN_MAX)
26034 return error_mark_node;
26036 return ix86_builtins[code];
26039 /* Errors in the source file can cause expand_expr to return const0_rtx
26040 where we expect a vector. To avoid crashing, use one of the vector
26041 clear instructions. */
26043 safe_vector_operand (rtx x, enum machine_mode mode)
26045 if (x == const0_rtx)
26046 x = CONST0_RTX (mode);
26050 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
26053 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
26056 tree arg0 = CALL_EXPR_ARG (exp, 0);
26057 tree arg1 = CALL_EXPR_ARG (exp, 1);
26058 rtx op0 = expand_normal (arg0);
26059 rtx op1 = expand_normal (arg1);
26060 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26061 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26062 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
26064 if (VECTOR_MODE_P (mode0))
26065 op0 = safe_vector_operand (op0, mode0);
26066 if (VECTOR_MODE_P (mode1))
26067 op1 = safe_vector_operand (op1, mode1);
26069 if (optimize || !target
26070 || GET_MODE (target) != tmode
26071 || !insn_data[icode].operand[0].predicate (target, tmode))
26072 target = gen_reg_rtx (tmode);
26074 if (GET_MODE (op1) == SImode && mode1 == TImode)
26076 rtx x = gen_reg_rtx (V4SImode);
26077 emit_insn (gen_sse2_loadd (x, op1));
26078 op1 = gen_lowpart (TImode, x);
26081 if (!insn_data[icode].operand[1].predicate (op0, mode0))
26082 op0 = copy_to_mode_reg (mode0, op0);
26083 if (!insn_data[icode].operand[2].predicate (op1, mode1))
26084 op1 = copy_to_mode_reg (mode1, op1);
26086 pat = GEN_FCN (icode) (target, op0, op1);
26095 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
26098 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
26099 enum ix86_builtin_func_type m_type,
26100 enum rtx_code sub_code)
26105 bool comparison_p = false;
26107 bool last_arg_constant = false;
26108 int num_memory = 0;
26111 enum machine_mode mode;
26114 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26118 case MULTI_ARG_4_DF2_DI_I:
26119 case MULTI_ARG_4_DF2_DI_I1:
26120 case MULTI_ARG_4_SF2_SI_I:
26121 case MULTI_ARG_4_SF2_SI_I1:
26123 last_arg_constant = true;
26126 case MULTI_ARG_3_SF:
26127 case MULTI_ARG_3_DF:
26128 case MULTI_ARG_3_SF2:
26129 case MULTI_ARG_3_DF2:
26130 case MULTI_ARG_3_DI:
26131 case MULTI_ARG_3_SI:
26132 case MULTI_ARG_3_SI_DI:
26133 case MULTI_ARG_3_HI:
26134 case MULTI_ARG_3_HI_SI:
26135 case MULTI_ARG_3_QI:
26136 case MULTI_ARG_3_DI2:
26137 case MULTI_ARG_3_SI2:
26138 case MULTI_ARG_3_HI2:
26139 case MULTI_ARG_3_QI2:
26143 case MULTI_ARG_2_SF:
26144 case MULTI_ARG_2_DF:
26145 case MULTI_ARG_2_DI:
26146 case MULTI_ARG_2_SI:
26147 case MULTI_ARG_2_HI:
26148 case MULTI_ARG_2_QI:
26152 case MULTI_ARG_2_DI_IMM:
26153 case MULTI_ARG_2_SI_IMM:
26154 case MULTI_ARG_2_HI_IMM:
26155 case MULTI_ARG_2_QI_IMM:
26157 last_arg_constant = true;
26160 case MULTI_ARG_1_SF:
26161 case MULTI_ARG_1_DF:
26162 case MULTI_ARG_1_SF2:
26163 case MULTI_ARG_1_DF2:
26164 case MULTI_ARG_1_DI:
26165 case MULTI_ARG_1_SI:
26166 case MULTI_ARG_1_HI:
26167 case MULTI_ARG_1_QI:
26168 case MULTI_ARG_1_SI_DI:
26169 case MULTI_ARG_1_HI_DI:
26170 case MULTI_ARG_1_HI_SI:
26171 case MULTI_ARG_1_QI_DI:
26172 case MULTI_ARG_1_QI_SI:
26173 case MULTI_ARG_1_QI_HI:
26177 case MULTI_ARG_2_DI_CMP:
26178 case MULTI_ARG_2_SI_CMP:
26179 case MULTI_ARG_2_HI_CMP:
26180 case MULTI_ARG_2_QI_CMP:
26182 comparison_p = true;
26185 case MULTI_ARG_2_SF_TF:
26186 case MULTI_ARG_2_DF_TF:
26187 case MULTI_ARG_2_DI_TF:
26188 case MULTI_ARG_2_SI_TF:
26189 case MULTI_ARG_2_HI_TF:
26190 case MULTI_ARG_2_QI_TF:
26196 gcc_unreachable ();
26199 if (optimize || !target
26200 || GET_MODE (target) != tmode
26201 || !insn_data[icode].operand[0].predicate (target, tmode))
26202 target = gen_reg_rtx (tmode);
26204 gcc_assert (nargs <= 4);
26206 for (i = 0; i < nargs; i++)
26208 tree arg = CALL_EXPR_ARG (exp, i);
26209 rtx op = expand_normal (arg);
26210 int adjust = (comparison_p) ? 1 : 0;
26211 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
26213 if (last_arg_constant && i == nargs - 1)
26215 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
26217 enum insn_code new_icode = icode;
26220 case CODE_FOR_xop_vpermil2v2df3:
26221 case CODE_FOR_xop_vpermil2v4sf3:
26222 case CODE_FOR_xop_vpermil2v4df3:
26223 case CODE_FOR_xop_vpermil2v8sf3:
26224 error ("the last argument must be a 2-bit immediate");
26225 return gen_reg_rtx (tmode);
26226 case CODE_FOR_xop_rotlv2di3:
26227 new_icode = CODE_FOR_rotlv2di3;
26229 case CODE_FOR_xop_rotlv4si3:
26230 new_icode = CODE_FOR_rotlv4si3;
26232 case CODE_FOR_xop_rotlv8hi3:
26233 new_icode = CODE_FOR_rotlv8hi3;
26235 case CODE_FOR_xop_rotlv16qi3:
26236 new_icode = CODE_FOR_rotlv16qi3;
26238 if (CONST_INT_P (op))
26240 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
26241 op = GEN_INT (INTVAL (op) & mask);
26242 gcc_checking_assert
26243 (insn_data[icode].operand[i + 1].predicate (op, mode));
26247 gcc_checking_assert
26249 && insn_data[new_icode].operand[0].mode == tmode
26250 && insn_data[new_icode].operand[1].mode == tmode
26251 && insn_data[new_icode].operand[2].mode == mode
26252 && insn_data[new_icode].operand[0].predicate
26253 == insn_data[icode].operand[0].predicate
26254 && insn_data[new_icode].operand[1].predicate
26255 == insn_data[icode].operand[1].predicate);
26261 gcc_unreachable ();
26268 if (VECTOR_MODE_P (mode))
26269 op = safe_vector_operand (op, mode);
26271 /* If we aren't optimizing, only allow one memory operand to be
26273 if (memory_operand (op, mode))
26276 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
26279 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
26281 op = force_reg (mode, op);
26285 args[i].mode = mode;
26291 pat = GEN_FCN (icode) (target, args[0].op);
26296 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
26297 GEN_INT ((int)sub_code));
26298 else if (! comparison_p)
26299 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26302 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
26306 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
26311 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26315 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
26319 gcc_unreachable ();
26329 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
26330 insns with vec_merge. */
26333 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
26337 tree arg0 = CALL_EXPR_ARG (exp, 0);
26338 rtx op1, op0 = expand_normal (arg0);
26339 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26340 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26342 if (optimize || !target
26343 || GET_MODE (target) != tmode
26344 || !insn_data[icode].operand[0].predicate (target, tmode))
26345 target = gen_reg_rtx (tmode);
26347 if (VECTOR_MODE_P (mode0))
26348 op0 = safe_vector_operand (op0, mode0);
26350 if ((optimize && !register_operand (op0, mode0))
26351 || !insn_data[icode].operand[1].predicate (op0, mode0))
26352 op0 = copy_to_mode_reg (mode0, op0);
26355 if (!insn_data[icode].operand[2].predicate (op1, mode0))
26356 op1 = copy_to_mode_reg (mode0, op1);
26358 pat = GEN_FCN (icode) (target, op0, op1);
26365 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
26368 ix86_expand_sse_compare (const struct builtin_description *d,
26369 tree exp, rtx target, bool swap)
26372 tree arg0 = CALL_EXPR_ARG (exp, 0);
26373 tree arg1 = CALL_EXPR_ARG (exp, 1);
26374 rtx op0 = expand_normal (arg0);
26375 rtx op1 = expand_normal (arg1);
26377 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26378 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26379 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
26380 enum rtx_code comparison = d->comparison;
26382 if (VECTOR_MODE_P (mode0))
26383 op0 = safe_vector_operand (op0, mode0);
26384 if (VECTOR_MODE_P (mode1))
26385 op1 = safe_vector_operand (op1, mode1);
26387 /* Swap operands if we have a comparison that isn't available in
26391 rtx tmp = gen_reg_rtx (mode1);
26392 emit_move_insn (tmp, op1);
26397 if (optimize || !target
26398 || GET_MODE (target) != tmode
26399 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26400 target = gen_reg_rtx (tmode);
26402 if ((optimize && !register_operand (op0, mode0))
26403 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
26404 op0 = copy_to_mode_reg (mode0, op0);
26405 if ((optimize && !register_operand (op1, mode1))
26406 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
26407 op1 = copy_to_mode_reg (mode1, op1);
26409 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
26410 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
26417 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
26420 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
26424 tree arg0 = CALL_EXPR_ARG (exp, 0);
26425 tree arg1 = CALL_EXPR_ARG (exp, 1);
26426 rtx op0 = expand_normal (arg0);
26427 rtx op1 = expand_normal (arg1);
26428 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26429 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26430 enum rtx_code comparison = d->comparison;
26432 if (VECTOR_MODE_P (mode0))
26433 op0 = safe_vector_operand (op0, mode0);
26434 if (VECTOR_MODE_P (mode1))
26435 op1 = safe_vector_operand (op1, mode1);
26437 /* Swap operands if we have a comparison that isn't available in
26439 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
26446 target = gen_reg_rtx (SImode);
26447 emit_move_insn (target, const0_rtx);
26448 target = gen_rtx_SUBREG (QImode, target, 0);
26450 if ((optimize && !register_operand (op0, mode0))
26451 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26452 op0 = copy_to_mode_reg (mode0, op0);
26453 if ((optimize && !register_operand (op1, mode1))
26454 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26455 op1 = copy_to_mode_reg (mode1, op1);
26457 pat = GEN_FCN (d->icode) (op0, op1);
26461 emit_insn (gen_rtx_SET (VOIDmode,
26462 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26463 gen_rtx_fmt_ee (comparison, QImode,
26467 return SUBREG_REG (target);
26470 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
26473 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
26477 tree arg0 = CALL_EXPR_ARG (exp, 0);
26478 rtx op1, op0 = expand_normal (arg0);
26479 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26480 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26482 if (optimize || target == 0
26483 || GET_MODE (target) != tmode
26484 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26485 target = gen_reg_rtx (tmode);
26487 if (VECTOR_MODE_P (mode0))
26488 op0 = safe_vector_operand (op0, mode0);
26490 if ((optimize && !register_operand (op0, mode0))
26491 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26492 op0 = copy_to_mode_reg (mode0, op0);
26494 op1 = GEN_INT (d->comparison);
26496 pat = GEN_FCN (d->icode) (target, op0, op1);
26503 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26506 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
26510 tree arg0 = CALL_EXPR_ARG (exp, 0);
26511 tree arg1 = CALL_EXPR_ARG (exp, 1);
26512 rtx op0 = expand_normal (arg0);
26513 rtx op1 = expand_normal (arg1);
26514 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26515 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26516 enum rtx_code comparison = d->comparison;
26518 if (VECTOR_MODE_P (mode0))
26519 op0 = safe_vector_operand (op0, mode0);
26520 if (VECTOR_MODE_P (mode1))
26521 op1 = safe_vector_operand (op1, mode1);
26523 target = gen_reg_rtx (SImode);
26524 emit_move_insn (target, const0_rtx);
26525 target = gen_rtx_SUBREG (QImode, target, 0);
26527 if ((optimize && !register_operand (op0, mode0))
26528 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26529 op0 = copy_to_mode_reg (mode0, op0);
26530 if ((optimize && !register_operand (op1, mode1))
26531 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26532 op1 = copy_to_mode_reg (mode1, op1);
26534 pat = GEN_FCN (d->icode) (op0, op1);
26538 emit_insn (gen_rtx_SET (VOIDmode,
26539 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26540 gen_rtx_fmt_ee (comparison, QImode,
26544 return SUBREG_REG (target);
26547 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26550 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26551 tree exp, rtx target)
26554 tree arg0 = CALL_EXPR_ARG (exp, 0);
26555 tree arg1 = CALL_EXPR_ARG (exp, 1);
26556 tree arg2 = CALL_EXPR_ARG (exp, 2);
26557 tree arg3 = CALL_EXPR_ARG (exp, 3);
26558 tree arg4 = CALL_EXPR_ARG (exp, 4);
26559 rtx scratch0, scratch1;
26560 rtx op0 = expand_normal (arg0);
26561 rtx op1 = expand_normal (arg1);
26562 rtx op2 = expand_normal (arg2);
26563 rtx op3 = expand_normal (arg3);
26564 rtx op4 = expand_normal (arg4);
26565 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26567 tmode0 = insn_data[d->icode].operand[0].mode;
26568 tmode1 = insn_data[d->icode].operand[1].mode;
26569 modev2 = insn_data[d->icode].operand[2].mode;
26570 modei3 = insn_data[d->icode].operand[3].mode;
26571 modev4 = insn_data[d->icode].operand[4].mode;
26572 modei5 = insn_data[d->icode].operand[5].mode;
26573 modeimm = insn_data[d->icode].operand[6].mode;
26575 if (VECTOR_MODE_P (modev2))
26576 op0 = safe_vector_operand (op0, modev2);
26577 if (VECTOR_MODE_P (modev4))
26578 op2 = safe_vector_operand (op2, modev4);
26580 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26581 op0 = copy_to_mode_reg (modev2, op0);
26582 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26583 op1 = copy_to_mode_reg (modei3, op1);
26584 if ((optimize && !register_operand (op2, modev4))
26585 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26586 op2 = copy_to_mode_reg (modev4, op2);
26587 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26588 op3 = copy_to_mode_reg (modei5, op3);
26590 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26592 error ("the fifth argument must be an 8-bit immediate");
26596 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26598 if (optimize || !target
26599 || GET_MODE (target) != tmode0
26600 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26601 target = gen_reg_rtx (tmode0);
26603 scratch1 = gen_reg_rtx (tmode1);
26605 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26607 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26609 if (optimize || !target
26610 || GET_MODE (target) != tmode1
26611 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26612 target = gen_reg_rtx (tmode1);
26614 scratch0 = gen_reg_rtx (tmode0);
26616 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26620 gcc_assert (d->flag);
26622 scratch0 = gen_reg_rtx (tmode0);
26623 scratch1 = gen_reg_rtx (tmode1);
26625 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26635 target = gen_reg_rtx (SImode);
26636 emit_move_insn (target, const0_rtx);
26637 target = gen_rtx_SUBREG (QImode, target, 0);
26640 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26641 gen_rtx_fmt_ee (EQ, QImode,
26642 gen_rtx_REG ((enum machine_mode) d->flag,
26645 return SUBREG_REG (target);
26652 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26655 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26656 tree exp, rtx target)
26659 tree arg0 = CALL_EXPR_ARG (exp, 0);
26660 tree arg1 = CALL_EXPR_ARG (exp, 1);
26661 tree arg2 = CALL_EXPR_ARG (exp, 2);
26662 rtx scratch0, scratch1;
26663 rtx op0 = expand_normal (arg0);
26664 rtx op1 = expand_normal (arg1);
26665 rtx op2 = expand_normal (arg2);
26666 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26668 tmode0 = insn_data[d->icode].operand[0].mode;
26669 tmode1 = insn_data[d->icode].operand[1].mode;
26670 modev2 = insn_data[d->icode].operand[2].mode;
26671 modev3 = insn_data[d->icode].operand[3].mode;
26672 modeimm = insn_data[d->icode].operand[4].mode;
26674 if (VECTOR_MODE_P (modev2))
26675 op0 = safe_vector_operand (op0, modev2);
26676 if (VECTOR_MODE_P (modev3))
26677 op1 = safe_vector_operand (op1, modev3);
26679 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26680 op0 = copy_to_mode_reg (modev2, op0);
26681 if ((optimize && !register_operand (op1, modev3))
26682 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26683 op1 = copy_to_mode_reg (modev3, op1);
26685 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26687 error ("the third argument must be an 8-bit immediate");
26691 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26693 if (optimize || !target
26694 || GET_MODE (target) != tmode0
26695 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26696 target = gen_reg_rtx (tmode0);
26698 scratch1 = gen_reg_rtx (tmode1);
26700 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26702 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26704 if (optimize || !target
26705 || GET_MODE (target) != tmode1
26706 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26707 target = gen_reg_rtx (tmode1);
26709 scratch0 = gen_reg_rtx (tmode0);
26711 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26715 gcc_assert (d->flag);
26717 scratch0 = gen_reg_rtx (tmode0);
26718 scratch1 = gen_reg_rtx (tmode1);
26720 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26730 target = gen_reg_rtx (SImode);
26731 emit_move_insn (target, const0_rtx);
26732 target = gen_rtx_SUBREG (QImode, target, 0);
26735 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26736 gen_rtx_fmt_ee (EQ, QImode,
26737 gen_rtx_REG ((enum machine_mode) d->flag,
26740 return SUBREG_REG (target);
26746 /* Subroutine of ix86_expand_builtin to take care of insns with
26747 variable number of operands. */
26750 ix86_expand_args_builtin (const struct builtin_description *d,
26751 tree exp, rtx target)
26753 rtx pat, real_target;
26754 unsigned int i, nargs;
26755 unsigned int nargs_constant = 0;
26756 int num_memory = 0;
26760 enum machine_mode mode;
26762 bool last_arg_count = false;
26763 enum insn_code icode = d->icode;
26764 const struct insn_data_d *insn_p = &insn_data[icode];
26765 enum machine_mode tmode = insn_p->operand[0].mode;
26766 enum machine_mode rmode = VOIDmode;
26768 enum rtx_code comparison = d->comparison;
26770 switch ((enum ix86_builtin_func_type) d->flag)
26772 case V2DF_FTYPE_V2DF_ROUND:
26773 case V4DF_FTYPE_V4DF_ROUND:
26774 case V4SF_FTYPE_V4SF_ROUND:
26775 case V8SF_FTYPE_V8SF_ROUND:
26776 return ix86_expand_sse_round (d, exp, target);
26777 case INT_FTYPE_V8SF_V8SF_PTEST:
26778 case INT_FTYPE_V4DI_V4DI_PTEST:
26779 case INT_FTYPE_V4DF_V4DF_PTEST:
26780 case INT_FTYPE_V4SF_V4SF_PTEST:
26781 case INT_FTYPE_V2DI_V2DI_PTEST:
26782 case INT_FTYPE_V2DF_V2DF_PTEST:
26783 return ix86_expand_sse_ptest (d, exp, target);
26784 case FLOAT128_FTYPE_FLOAT128:
26785 case FLOAT_FTYPE_FLOAT:
26786 case INT_FTYPE_INT:
26787 case UINT64_FTYPE_INT:
26788 case UINT16_FTYPE_UINT16:
26789 case INT64_FTYPE_INT64:
26790 case INT64_FTYPE_V4SF:
26791 case INT64_FTYPE_V2DF:
26792 case INT_FTYPE_V16QI:
26793 case INT_FTYPE_V8QI:
26794 case INT_FTYPE_V8SF:
26795 case INT_FTYPE_V4DF:
26796 case INT_FTYPE_V4SF:
26797 case INT_FTYPE_V2DF:
26798 case INT_FTYPE_V32QI:
26799 case V16QI_FTYPE_V16QI:
26800 case V8SI_FTYPE_V8SF:
26801 case V8SI_FTYPE_V4SI:
26802 case V8HI_FTYPE_V8HI:
26803 case V8HI_FTYPE_V16QI:
26804 case V8QI_FTYPE_V8QI:
26805 case V8SF_FTYPE_V8SF:
26806 case V8SF_FTYPE_V8SI:
26807 case V8SF_FTYPE_V4SF:
26808 case V8SF_FTYPE_V8HI:
26809 case V4SI_FTYPE_V4SI:
26810 case V4SI_FTYPE_V16QI:
26811 case V4SI_FTYPE_V4SF:
26812 case V4SI_FTYPE_V8SI:
26813 case V4SI_FTYPE_V8HI:
26814 case V4SI_FTYPE_V4DF:
26815 case V4SI_FTYPE_V2DF:
26816 case V4HI_FTYPE_V4HI:
26817 case V4DF_FTYPE_V4DF:
26818 case V4DF_FTYPE_V4SI:
26819 case V4DF_FTYPE_V4SF:
26820 case V4DF_FTYPE_V2DF:
26821 case V4SF_FTYPE_V4SF:
26822 case V4SF_FTYPE_V4SI:
26823 case V4SF_FTYPE_V8SF:
26824 case V4SF_FTYPE_V4DF:
26825 case V4SF_FTYPE_V8HI:
26826 case V4SF_FTYPE_V2DF:
26827 case V2DI_FTYPE_V2DI:
26828 case V2DI_FTYPE_V16QI:
26829 case V2DI_FTYPE_V8HI:
26830 case V2DI_FTYPE_V4SI:
26831 case V2DF_FTYPE_V2DF:
26832 case V2DF_FTYPE_V4SI:
26833 case V2DF_FTYPE_V4DF:
26834 case V2DF_FTYPE_V4SF:
26835 case V2DF_FTYPE_V2SI:
26836 case V2SI_FTYPE_V2SI:
26837 case V2SI_FTYPE_V4SF:
26838 case V2SI_FTYPE_V2SF:
26839 case V2SI_FTYPE_V2DF:
26840 case V2SF_FTYPE_V2SF:
26841 case V2SF_FTYPE_V2SI:
26842 case V32QI_FTYPE_V32QI:
26843 case V32QI_FTYPE_V16QI:
26844 case V16HI_FTYPE_V16HI:
26845 case V16HI_FTYPE_V8HI:
26846 case V8SI_FTYPE_V8SI:
26847 case V16HI_FTYPE_V16QI:
26848 case V8SI_FTYPE_V16QI:
26849 case V4DI_FTYPE_V16QI:
26850 case V8SI_FTYPE_V8HI:
26851 case V4DI_FTYPE_V8HI:
26852 case V4DI_FTYPE_V4SI:
26853 case V4DI_FTYPE_V2DI:
26856 case V4SF_FTYPE_V4SF_VEC_MERGE:
26857 case V2DF_FTYPE_V2DF_VEC_MERGE:
26858 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26859 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26860 case V16QI_FTYPE_V16QI_V16QI:
26861 case V16QI_FTYPE_V8HI_V8HI:
26862 case V8QI_FTYPE_V8QI_V8QI:
26863 case V8QI_FTYPE_V4HI_V4HI:
26864 case V8HI_FTYPE_V8HI_V8HI:
26865 case V8HI_FTYPE_V16QI_V16QI:
26866 case V8HI_FTYPE_V4SI_V4SI:
26867 case V8SF_FTYPE_V8SF_V8SF:
26868 case V8SF_FTYPE_V8SF_V8SI:
26869 case V4SI_FTYPE_V4SI_V4SI:
26870 case V4SI_FTYPE_V8HI_V8HI:
26871 case V4SI_FTYPE_V4SF_V4SF:
26872 case V4SI_FTYPE_V2DF_V2DF:
26873 case V4HI_FTYPE_V4HI_V4HI:
26874 case V4HI_FTYPE_V8QI_V8QI:
26875 case V4HI_FTYPE_V2SI_V2SI:
26876 case V4DF_FTYPE_V4DF_V4DF:
26877 case V4DF_FTYPE_V4DF_V4DI:
26878 case V4SF_FTYPE_V4SF_V4SF:
26879 case V4SF_FTYPE_V4SF_V4SI:
26880 case V4SF_FTYPE_V4SF_V2SI:
26881 case V4SF_FTYPE_V4SF_V2DF:
26882 case V4SF_FTYPE_V4SF_DI:
26883 case V4SF_FTYPE_V4SF_SI:
26884 case V2DI_FTYPE_V2DI_V2DI:
26885 case V2DI_FTYPE_V16QI_V16QI:
26886 case V2DI_FTYPE_V4SI_V4SI:
26887 case V2DI_FTYPE_V2DI_V16QI:
26888 case V2DI_FTYPE_V2DF_V2DF:
26889 case V2SI_FTYPE_V2SI_V2SI:
26890 case V2SI_FTYPE_V4HI_V4HI:
26891 case V2SI_FTYPE_V2SF_V2SF:
26892 case V2DF_FTYPE_V2DF_V2DF:
26893 case V2DF_FTYPE_V2DF_V4SF:
26894 case V2DF_FTYPE_V2DF_V2DI:
26895 case V2DF_FTYPE_V2DF_DI:
26896 case V2DF_FTYPE_V2DF_SI:
26897 case V2SF_FTYPE_V2SF_V2SF:
26898 case V1DI_FTYPE_V1DI_V1DI:
26899 case V1DI_FTYPE_V8QI_V8QI:
26900 case V1DI_FTYPE_V2SI_V2SI:
26901 case V32QI_FTYPE_V16HI_V16HI:
26902 case V16HI_FTYPE_V8SI_V8SI:
26903 case V32QI_FTYPE_V32QI_V32QI:
26904 case V16HI_FTYPE_V32QI_V32QI:
26905 case V16HI_FTYPE_V16HI_V16HI:
26906 case V8SI_FTYPE_V8SI_V8SI:
26907 case V8SI_FTYPE_V16HI_V16HI:
26908 case V4DI_FTYPE_V4DI_V4DI:
26909 case V4DI_FTYPE_V8SI_V8SI:
26910 if (comparison == UNKNOWN)
26911 return ix86_expand_binop_builtin (icode, exp, target);
26914 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26915 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26916 gcc_assert (comparison != UNKNOWN);
26920 case V16HI_FTYPE_V16HI_V8HI_COUNT:
26921 case V16HI_FTYPE_V16HI_SI_COUNT:
26922 case V8SI_FTYPE_V8SI_V4SI_COUNT:
26923 case V8SI_FTYPE_V8SI_SI_COUNT:
26924 case V4DI_FTYPE_V4DI_V2DI_COUNT:
26925 case V4DI_FTYPE_V4DI_INT_COUNT:
26926 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26927 case V8HI_FTYPE_V8HI_SI_COUNT:
26928 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26929 case V4SI_FTYPE_V4SI_SI_COUNT:
26930 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26931 case V4HI_FTYPE_V4HI_SI_COUNT:
26932 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26933 case V2DI_FTYPE_V2DI_SI_COUNT:
26934 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26935 case V2SI_FTYPE_V2SI_SI_COUNT:
26936 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26937 case V1DI_FTYPE_V1DI_SI_COUNT:
26939 last_arg_count = true;
26941 case UINT64_FTYPE_UINT64_UINT64:
26942 case UINT_FTYPE_UINT_UINT:
26943 case UINT_FTYPE_UINT_USHORT:
26944 case UINT_FTYPE_UINT_UCHAR:
26945 case UINT16_FTYPE_UINT16_INT:
26946 case UINT8_FTYPE_UINT8_INT:
26949 case V2DI_FTYPE_V2DI_INT_CONVERT:
26952 nargs_constant = 1;
26954 case V8HI_FTYPE_V8HI_INT:
26955 case V8HI_FTYPE_V8SF_INT:
26956 case V8HI_FTYPE_V4SF_INT:
26957 case V8SF_FTYPE_V8SF_INT:
26958 case V4SI_FTYPE_V4SI_INT:
26959 case V4SI_FTYPE_V8SI_INT:
26960 case V4HI_FTYPE_V4HI_INT:
26961 case V4DF_FTYPE_V4DF_INT:
26962 case V4SF_FTYPE_V4SF_INT:
26963 case V4SF_FTYPE_V8SF_INT:
26964 case V2DI_FTYPE_V2DI_INT:
26965 case V2DF_FTYPE_V2DF_INT:
26966 case V2DF_FTYPE_V4DF_INT:
26967 case V16HI_FTYPE_V16HI_INT:
26968 case V8SI_FTYPE_V8SI_INT:
26969 case V4DI_FTYPE_V4DI_INT:
26970 case V2DI_FTYPE_V4DI_INT:
26972 nargs_constant = 1;
26974 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26975 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26976 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26977 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26978 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26979 case V32QI_FTYPE_V32QI_V32QI_V32QI:
26982 case V32QI_FTYPE_V32QI_V32QI_INT:
26983 case V16HI_FTYPE_V16HI_V16HI_INT:
26984 case V16QI_FTYPE_V16QI_V16QI_INT:
26985 case V4DI_FTYPE_V4DI_V4DI_INT:
26986 case V8HI_FTYPE_V8HI_V8HI_INT:
26987 case V8SI_FTYPE_V8SI_V8SI_INT:
26988 case V8SI_FTYPE_V8SI_V4SI_INT:
26989 case V8SF_FTYPE_V8SF_V8SF_INT:
26990 case V8SF_FTYPE_V8SF_V4SF_INT:
26991 case V4SI_FTYPE_V4SI_V4SI_INT:
26992 case V4DF_FTYPE_V4DF_V4DF_INT:
26993 case V4DF_FTYPE_V4DF_V2DF_INT:
26994 case V4SF_FTYPE_V4SF_V4SF_INT:
26995 case V2DI_FTYPE_V2DI_V2DI_INT:
26996 case V4DI_FTYPE_V4DI_V2DI_INT:
26997 case V2DF_FTYPE_V2DF_V2DF_INT:
26999 nargs_constant = 1;
27001 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
27004 nargs_constant = 1;
27006 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
27009 nargs_constant = 1;
27011 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
27014 nargs_constant = 1;
27016 case V2DI_FTYPE_V2DI_UINT_UINT:
27018 nargs_constant = 2;
27020 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
27021 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
27022 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
27023 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
27025 nargs_constant = 1;
27027 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
27029 nargs_constant = 2;
27032 gcc_unreachable ();
27035 gcc_assert (nargs <= ARRAY_SIZE (args));
27037 if (comparison != UNKNOWN)
27039 gcc_assert (nargs == 2);
27040 return ix86_expand_sse_compare (d, exp, target, swap);
27043 if (rmode == VOIDmode || rmode == tmode)
27047 || GET_MODE (target) != tmode
27048 || !insn_p->operand[0].predicate (target, tmode))
27049 target = gen_reg_rtx (tmode);
27050 real_target = target;
27054 target = gen_reg_rtx (rmode);
27055 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
27058 for (i = 0; i < nargs; i++)
27060 tree arg = CALL_EXPR_ARG (exp, i);
27061 rtx op = expand_normal (arg);
27062 enum machine_mode mode = insn_p->operand[i + 1].mode;
27063 bool match = insn_p->operand[i + 1].predicate (op, mode);
27065 if (last_arg_count && (i + 1) == nargs)
27067 /* SIMD shift insns take either an 8-bit immediate or
27068 register as count. But builtin functions take int as
27069 count. If count doesn't match, we put it in register. */
27072 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
27073 if (!insn_p->operand[i + 1].predicate (op, mode))
27074 op = copy_to_reg (op);
27077 else if ((nargs - i) <= nargs_constant)
27082 case CODE_FOR_avx2_inserti128:
27083 case CODE_FOR_avx2_extracti128:
27084 error ("the last argument must be an 1-bit immediate");
27087 case CODE_FOR_sse4_1_roundpd:
27088 case CODE_FOR_sse4_1_roundps:
27089 case CODE_FOR_sse4_1_roundsd:
27090 case CODE_FOR_sse4_1_roundss:
27091 case CODE_FOR_sse4_1_blendps:
27092 case CODE_FOR_avx_blendpd256:
27093 case CODE_FOR_avx_vpermilv4df:
27094 case CODE_FOR_avx_roundpd256:
27095 case CODE_FOR_avx_roundps256:
27096 error ("the last argument must be a 4-bit immediate");
27099 case CODE_FOR_sse4_1_blendpd:
27100 case CODE_FOR_avx_vpermilv2df:
27101 case CODE_FOR_xop_vpermil2v2df3:
27102 case CODE_FOR_xop_vpermil2v4sf3:
27103 case CODE_FOR_xop_vpermil2v4df3:
27104 case CODE_FOR_xop_vpermil2v8sf3:
27105 error ("the last argument must be a 2-bit immediate");
27108 case CODE_FOR_avx_vextractf128v4df:
27109 case CODE_FOR_avx_vextractf128v8sf:
27110 case CODE_FOR_avx_vextractf128v8si:
27111 case CODE_FOR_avx_vinsertf128v4df:
27112 case CODE_FOR_avx_vinsertf128v8sf:
27113 case CODE_FOR_avx_vinsertf128v8si:
27114 error ("the last argument must be a 1-bit immediate");
27117 case CODE_FOR_avx_vmcmpv2df3:
27118 case CODE_FOR_avx_vmcmpv4sf3:
27119 case CODE_FOR_avx_cmpv2df3:
27120 case CODE_FOR_avx_cmpv4sf3:
27121 case CODE_FOR_avx_cmpv4df3:
27122 case CODE_FOR_avx_cmpv8sf3:
27123 error ("the last argument must be a 5-bit immediate");
27127 switch (nargs_constant)
27130 if ((nargs - i) == nargs_constant)
27132 error ("the next to last argument must be an 8-bit immediate");
27136 error ("the last argument must be an 8-bit immediate");
27139 gcc_unreachable ();
27146 if (VECTOR_MODE_P (mode))
27147 op = safe_vector_operand (op, mode);
27149 /* If we aren't optimizing, only allow one memory operand to
27151 if (memory_operand (op, mode))
27154 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
27156 if (optimize || !match || num_memory > 1)
27157 op = copy_to_mode_reg (mode, op);
27161 op = copy_to_reg (op);
27162 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
27167 args[i].mode = mode;
27173 pat = GEN_FCN (icode) (real_target, args[0].op);
27176 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
27179 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27183 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27184 args[2].op, args[3].op);
27187 gcc_unreachable ();
27197 /* Subroutine of ix86_expand_builtin to take care of special insns
27198 with variable number of operands. */
27201 ix86_expand_special_args_builtin (const struct builtin_description *d,
27202 tree exp, rtx target)
27206 unsigned int i, nargs, arg_adjust, memory;
27210 enum machine_mode mode;
27212 enum insn_code icode = d->icode;
27213 bool last_arg_constant = false;
27214 const struct insn_data_d *insn_p = &insn_data[icode];
27215 enum machine_mode tmode = insn_p->operand[0].mode;
27216 enum { load, store } klass;
27218 switch ((enum ix86_builtin_func_type) d->flag)
27220 case VOID_FTYPE_VOID:
27221 if (icode == CODE_FOR_avx_vzeroupper)
27222 target = GEN_INT (vzeroupper_intrinsic);
27223 emit_insn (GEN_FCN (icode) (target));
27225 case VOID_FTYPE_UINT64:
27226 case VOID_FTYPE_UNSIGNED:
27232 case UINT64_FTYPE_VOID:
27233 case UNSIGNED_FTYPE_VOID:
27238 case UINT64_FTYPE_PUNSIGNED:
27239 case V2DI_FTYPE_PV2DI:
27240 case V4DI_FTYPE_PV4DI:
27241 case V32QI_FTYPE_PCCHAR:
27242 case V16QI_FTYPE_PCCHAR:
27243 case V8SF_FTYPE_PCV4SF:
27244 case V8SF_FTYPE_PCFLOAT:
27245 case V4SF_FTYPE_PCFLOAT:
27246 case V4DF_FTYPE_PCV2DF:
27247 case V4DF_FTYPE_PCDOUBLE:
27248 case V2DF_FTYPE_PCDOUBLE:
27249 case VOID_FTYPE_PVOID:
27254 case VOID_FTYPE_PV2SF_V4SF:
27255 case VOID_FTYPE_PV4DI_V4DI:
27256 case VOID_FTYPE_PV2DI_V2DI:
27257 case VOID_FTYPE_PCHAR_V32QI:
27258 case VOID_FTYPE_PCHAR_V16QI:
27259 case VOID_FTYPE_PFLOAT_V8SF:
27260 case VOID_FTYPE_PFLOAT_V4SF:
27261 case VOID_FTYPE_PDOUBLE_V4DF:
27262 case VOID_FTYPE_PDOUBLE_V2DF:
27263 case VOID_FTYPE_PULONGLONG_ULONGLONG:
27264 case VOID_FTYPE_PINT_INT:
27267 /* Reserve memory operand for target. */
27268 memory = ARRAY_SIZE (args);
27270 case V4SF_FTYPE_V4SF_PCV2SF:
27271 case V2DF_FTYPE_V2DF_PCDOUBLE:
27276 case V8SF_FTYPE_PCV8SF_V8SI:
27277 case V4DF_FTYPE_PCV4DF_V4DI:
27278 case V4SF_FTYPE_PCV4SF_V4SI:
27279 case V2DF_FTYPE_PCV2DF_V2DI:
27280 case V8SI_FTYPE_PCV8SI_V8SI:
27281 case V4DI_FTYPE_PCV4DI_V4DI:
27282 case V4SI_FTYPE_PCV4SI_V4SI:
27283 case V2DI_FTYPE_PCV2DI_V2DI:
27288 case VOID_FTYPE_PV8SF_V8SI_V8SF:
27289 case VOID_FTYPE_PV4DF_V4DI_V4DF:
27290 case VOID_FTYPE_PV4SF_V4SI_V4SF:
27291 case VOID_FTYPE_PV2DF_V2DI_V2DF:
27292 case VOID_FTYPE_PV8SI_V8SI_V8SI:
27293 case VOID_FTYPE_PV4DI_V4DI_V4DI:
27294 case VOID_FTYPE_PV4SI_V4SI_V4SI:
27295 case VOID_FTYPE_PV2DI_V2DI_V2DI:
27298 /* Reserve memory operand for target. */
27299 memory = ARRAY_SIZE (args);
27301 case VOID_FTYPE_UINT_UINT_UINT:
27302 case VOID_FTYPE_UINT64_UINT_UINT:
27303 case UCHAR_FTYPE_UINT_UINT_UINT:
27304 case UCHAR_FTYPE_UINT64_UINT_UINT:
27307 memory = ARRAY_SIZE (args);
27308 last_arg_constant = true;
27311 gcc_unreachable ();
27314 gcc_assert (nargs <= ARRAY_SIZE (args));
27316 if (klass == store)
27318 arg = CALL_EXPR_ARG (exp, 0);
27319 op = expand_normal (arg);
27320 gcc_assert (target == 0);
27323 if (GET_MODE (op) != Pmode)
27324 op = convert_to_mode (Pmode, op, 1);
27325 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
27328 target = force_reg (tmode, op);
27336 || GET_MODE (target) != tmode
27337 || !insn_p->operand[0].predicate (target, tmode))
27338 target = gen_reg_rtx (tmode);
27341 for (i = 0; i < nargs; i++)
27343 enum machine_mode mode = insn_p->operand[i + 1].mode;
27346 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
27347 op = expand_normal (arg);
27348 match = insn_p->operand[i + 1].predicate (op, mode);
27350 if (last_arg_constant && (i + 1) == nargs)
27354 if (icode == CODE_FOR_lwp_lwpvalsi3
27355 || icode == CODE_FOR_lwp_lwpinssi3
27356 || icode == CODE_FOR_lwp_lwpvaldi3
27357 || icode == CODE_FOR_lwp_lwpinsdi3)
27358 error ("the last argument must be a 32-bit immediate");
27360 error ("the last argument must be an 8-bit immediate");
27368 /* This must be the memory operand. */
27369 if (GET_MODE (op) != Pmode)
27370 op = convert_to_mode (Pmode, op, 1);
27371 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
27372 gcc_assert (GET_MODE (op) == mode
27373 || GET_MODE (op) == VOIDmode);
27377 /* This must be register. */
27378 if (VECTOR_MODE_P (mode))
27379 op = safe_vector_operand (op, mode);
27381 gcc_assert (GET_MODE (op) == mode
27382 || GET_MODE (op) == VOIDmode);
27383 op = copy_to_mode_reg (mode, op);
27388 args[i].mode = mode;
27394 pat = GEN_FCN (icode) (target);
27397 pat = GEN_FCN (icode) (target, args[0].op);
27400 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27403 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
27406 gcc_unreachable ();
27412 return klass == store ? 0 : target;
27415 /* Return the integer constant in ARG. Constrain it to be in the range
27416 of the subparts of VEC_TYPE; issue an error if not. */
27419 get_element_number (tree vec_type, tree arg)
27421 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
27423 if (!host_integerp (arg, 1)
27424 || (elt = tree_low_cst (arg, 1), elt > max))
27426 error ("selector must be an integer constant in the range 0..%wi", max);
27433 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27434 ix86_expand_vector_init. We DO have language-level syntax for this, in
27435 the form of (type){ init-list }. Except that since we can't place emms
27436 instructions from inside the compiler, we can't allow the use of MMX
27437 registers unless the user explicitly asks for it. So we do *not* define
27438 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
27439 we have builtins invoked by mmintrin.h that gives us license to emit
27440 these sorts of instructions. */
27443 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
27445 enum machine_mode tmode = TYPE_MODE (type);
27446 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
27447 int i, n_elt = GET_MODE_NUNITS (tmode);
27448 rtvec v = rtvec_alloc (n_elt);
27450 gcc_assert (VECTOR_MODE_P (tmode));
27451 gcc_assert (call_expr_nargs (exp) == n_elt);
27453 for (i = 0; i < n_elt; ++i)
27455 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
27456 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
27459 if (!target || !register_operand (target, tmode))
27460 target = gen_reg_rtx (tmode);
27462 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
27466 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27467 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
27468 had a language-level syntax for referencing vector elements. */
27471 ix86_expand_vec_ext_builtin (tree exp, rtx target)
27473 enum machine_mode tmode, mode0;
27478 arg0 = CALL_EXPR_ARG (exp, 0);
27479 arg1 = CALL_EXPR_ARG (exp, 1);
27481 op0 = expand_normal (arg0);
27482 elt = get_element_number (TREE_TYPE (arg0), arg1);
27484 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27485 mode0 = TYPE_MODE (TREE_TYPE (arg0));
27486 gcc_assert (VECTOR_MODE_P (mode0));
27488 op0 = force_reg (mode0, op0);
27490 if (optimize || !target || !register_operand (target, tmode))
27491 target = gen_reg_rtx (tmode);
27493 ix86_expand_vector_extract (true, target, op0, elt);
27498 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27499 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
27500 a language-level syntax for referencing vector elements. */
27503 ix86_expand_vec_set_builtin (tree exp)
27505 enum machine_mode tmode, mode1;
27506 tree arg0, arg1, arg2;
27508 rtx op0, op1, target;
27510 arg0 = CALL_EXPR_ARG (exp, 0);
27511 arg1 = CALL_EXPR_ARG (exp, 1);
27512 arg2 = CALL_EXPR_ARG (exp, 2);
27514 tmode = TYPE_MODE (TREE_TYPE (arg0));
27515 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27516 gcc_assert (VECTOR_MODE_P (tmode));
27518 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
27519 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
27520 elt = get_element_number (TREE_TYPE (arg0), arg2);
27522 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
27523 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
27525 op0 = force_reg (tmode, op0);
27526 op1 = force_reg (mode1, op1);
27528 /* OP0 is the source of these builtin functions and shouldn't be
27529 modified. Create a copy, use it and return it as target. */
27530 target = gen_reg_rtx (tmode);
27531 emit_move_insn (target, op0);
27532 ix86_expand_vector_set (true, target, op1, elt);
27537 /* Expand an expression EXP that calls a built-in function,
27538 with result going to TARGET if that's convenient
27539 (and in mode MODE if that's convenient).
27540 SUBTARGET may be used as the target for computing one of EXP's operands.
27541 IGNORE is nonzero if the value is to be ignored. */
27544 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
27545 enum machine_mode mode ATTRIBUTE_UNUSED,
27546 int ignore ATTRIBUTE_UNUSED)
27548 const struct builtin_description *d;
27550 enum insn_code icode;
27551 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
27552 tree arg0, arg1, arg2, arg3, arg4;
27553 rtx op0, op1, op2, op3, op4, pat;
27554 enum machine_mode mode0, mode1, mode2, mode3, mode4;
27555 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
27557 /* Determine whether the builtin function is available under the current ISA.
27558 Originally the builtin was not created if it wasn't applicable to the
27559 current ISA based on the command line switches. With function specific
27560 options, we need to check in the context of the function making the call
27561 whether it is supported. */
27562 if (ix86_builtins_isa[fcode].isa
27563 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
27565 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
27566 NULL, (enum fpmath_unit) 0, false);
27569 error ("%qE needs unknown isa option", fndecl);
27572 gcc_assert (opts != NULL);
27573 error ("%qE needs isa option %s", fndecl, opts);
27581 case IX86_BUILTIN_MASKMOVQ:
27582 case IX86_BUILTIN_MASKMOVDQU:
27583 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27584 ? CODE_FOR_mmx_maskmovq
27585 : CODE_FOR_sse2_maskmovdqu);
27586 /* Note the arg order is different from the operand order. */
27587 arg1 = CALL_EXPR_ARG (exp, 0);
27588 arg2 = CALL_EXPR_ARG (exp, 1);
27589 arg0 = CALL_EXPR_ARG (exp, 2);
27590 op0 = expand_normal (arg0);
27591 op1 = expand_normal (arg1);
27592 op2 = expand_normal (arg2);
27593 mode0 = insn_data[icode].operand[0].mode;
27594 mode1 = insn_data[icode].operand[1].mode;
27595 mode2 = insn_data[icode].operand[2].mode;
27597 if (GET_MODE (op0) != Pmode)
27598 op0 = convert_to_mode (Pmode, op0, 1);
27599 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
27601 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27602 op0 = copy_to_mode_reg (mode0, op0);
27603 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27604 op1 = copy_to_mode_reg (mode1, op1);
27605 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27606 op2 = copy_to_mode_reg (mode2, op2);
27607 pat = GEN_FCN (icode) (op0, op1, op2);
27613 case IX86_BUILTIN_LDMXCSR:
27614 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27615 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27616 emit_move_insn (target, op0);
27617 emit_insn (gen_sse_ldmxcsr (target));
27620 case IX86_BUILTIN_STMXCSR:
27621 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27622 emit_insn (gen_sse_stmxcsr (target));
27623 return copy_to_mode_reg (SImode, target);
27625 case IX86_BUILTIN_CLFLUSH:
27626 arg0 = CALL_EXPR_ARG (exp, 0);
27627 op0 = expand_normal (arg0);
27628 icode = CODE_FOR_sse2_clflush;
27629 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27631 if (GET_MODE (op0) != Pmode)
27632 op0 = convert_to_mode (Pmode, op0, 1);
27633 op0 = force_reg (Pmode, op0);
27636 emit_insn (gen_sse2_clflush (op0));
27639 case IX86_BUILTIN_MONITOR:
27640 arg0 = CALL_EXPR_ARG (exp, 0);
27641 arg1 = CALL_EXPR_ARG (exp, 1);
27642 arg2 = CALL_EXPR_ARG (exp, 2);
27643 op0 = expand_normal (arg0);
27644 op1 = expand_normal (arg1);
27645 op2 = expand_normal (arg2);
27648 if (GET_MODE (op0) != Pmode)
27649 op0 = convert_to_mode (Pmode, op0, 1);
27650 op0 = force_reg (Pmode, op0);
27653 op1 = copy_to_mode_reg (SImode, op1);
27655 op2 = copy_to_mode_reg (SImode, op2);
27656 emit_insn (ix86_gen_monitor (op0, op1, op2));
27659 case IX86_BUILTIN_MWAIT:
27660 arg0 = CALL_EXPR_ARG (exp, 0);
27661 arg1 = CALL_EXPR_ARG (exp, 1);
27662 op0 = expand_normal (arg0);
27663 op1 = expand_normal (arg1);
27665 op0 = copy_to_mode_reg (SImode, op0);
27667 op1 = copy_to_mode_reg (SImode, op1);
27668 emit_insn (gen_sse3_mwait (op0, op1));
27671 case IX86_BUILTIN_VEC_INIT_V2SI:
27672 case IX86_BUILTIN_VEC_INIT_V4HI:
27673 case IX86_BUILTIN_VEC_INIT_V8QI:
27674 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27676 case IX86_BUILTIN_VEC_EXT_V2DF:
27677 case IX86_BUILTIN_VEC_EXT_V2DI:
27678 case IX86_BUILTIN_VEC_EXT_V4SF:
27679 case IX86_BUILTIN_VEC_EXT_V4SI:
27680 case IX86_BUILTIN_VEC_EXT_V8HI:
27681 case IX86_BUILTIN_VEC_EXT_V2SI:
27682 case IX86_BUILTIN_VEC_EXT_V4HI:
27683 case IX86_BUILTIN_VEC_EXT_V16QI:
27684 return ix86_expand_vec_ext_builtin (exp, target);
27686 case IX86_BUILTIN_VEC_SET_V2DI:
27687 case IX86_BUILTIN_VEC_SET_V4SF:
27688 case IX86_BUILTIN_VEC_SET_V4SI:
27689 case IX86_BUILTIN_VEC_SET_V8HI:
27690 case IX86_BUILTIN_VEC_SET_V4HI:
27691 case IX86_BUILTIN_VEC_SET_V16QI:
27692 return ix86_expand_vec_set_builtin (exp);
27694 case IX86_BUILTIN_VEC_PERM_V2DF:
27695 case IX86_BUILTIN_VEC_PERM_V4SF:
27696 case IX86_BUILTIN_VEC_PERM_V2DI:
27697 case IX86_BUILTIN_VEC_PERM_V4SI:
27698 case IX86_BUILTIN_VEC_PERM_V8HI:
27699 case IX86_BUILTIN_VEC_PERM_V16QI:
27700 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27701 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27702 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27703 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27704 case IX86_BUILTIN_VEC_PERM_V4DF:
27705 case IX86_BUILTIN_VEC_PERM_V8SF:
27706 return ix86_expand_vec_perm_builtin (exp);
27708 case IX86_BUILTIN_INFQ:
27709 case IX86_BUILTIN_HUGE_VALQ:
27711 REAL_VALUE_TYPE inf;
27715 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27717 tmp = validize_mem (force_const_mem (mode, tmp));
27720 target = gen_reg_rtx (mode);
27722 emit_move_insn (target, tmp);
27726 case IX86_BUILTIN_LLWPCB:
27727 arg0 = CALL_EXPR_ARG (exp, 0);
27728 op0 = expand_normal (arg0);
27729 icode = CODE_FOR_lwp_llwpcb;
27730 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27732 if (GET_MODE (op0) != Pmode)
27733 op0 = convert_to_mode (Pmode, op0, 1);
27734 op0 = force_reg (Pmode, op0);
27736 emit_insn (gen_lwp_llwpcb (op0));
27739 case IX86_BUILTIN_SLWPCB:
27740 icode = CODE_FOR_lwp_slwpcb;
27742 || !insn_data[icode].operand[0].predicate (target, Pmode))
27743 target = gen_reg_rtx (Pmode);
27744 emit_insn (gen_lwp_slwpcb (target));
27747 case IX86_BUILTIN_BEXTRI32:
27748 case IX86_BUILTIN_BEXTRI64:
27749 arg0 = CALL_EXPR_ARG (exp, 0);
27750 arg1 = CALL_EXPR_ARG (exp, 1);
27751 op0 = expand_normal (arg0);
27752 op1 = expand_normal (arg1);
27753 icode = (fcode == IX86_BUILTIN_BEXTRI32
27754 ? CODE_FOR_tbm_bextri_si
27755 : CODE_FOR_tbm_bextri_di);
27756 if (!CONST_INT_P (op1))
27758 error ("last argument must be an immediate");
27763 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27764 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27765 op1 = GEN_INT (length);
27766 op2 = GEN_INT (lsb_index);
27767 pat = GEN_FCN (icode) (target, op0, op1, op2);
27773 case IX86_BUILTIN_RDRAND16_STEP:
27774 icode = CODE_FOR_rdrandhi_1;
27778 case IX86_BUILTIN_RDRAND32_STEP:
27779 icode = CODE_FOR_rdrandsi_1;
27783 case IX86_BUILTIN_RDRAND64_STEP:
27784 icode = CODE_FOR_rdranddi_1;
27788 op0 = gen_reg_rtx (mode0);
27789 emit_insn (GEN_FCN (icode) (op0));
27791 arg0 = CALL_EXPR_ARG (exp, 0);
27792 op1 = expand_normal (arg0);
27793 if (!address_operand (op1, VOIDmode))
27795 op1 = convert_memory_address (Pmode, op1);
27796 op1 = copy_addr_to_reg (op1);
27798 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27800 op1 = gen_reg_rtx (SImode);
27801 emit_move_insn (op1, CONST1_RTX (SImode));
27803 /* Emit SImode conditional move. */
27804 if (mode0 == HImode)
27806 op2 = gen_reg_rtx (SImode);
27807 emit_insn (gen_zero_extendhisi2 (op2, op0));
27809 else if (mode0 == SImode)
27812 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27815 target = gen_reg_rtx (SImode);
27817 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27819 emit_insn (gen_rtx_SET (VOIDmode, target,
27820 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27823 case IX86_BUILTIN_GATHERSIV2DF:
27824 icode = CODE_FOR_avx2_gathersiv2df;
27826 case IX86_BUILTIN_GATHERSIV4DF:
27827 icode = CODE_FOR_avx2_gathersiv4df;
27829 case IX86_BUILTIN_GATHERDIV2DF:
27830 icode = CODE_FOR_avx2_gatherdiv2df;
27832 case IX86_BUILTIN_GATHERDIV4DF:
27833 icode = CODE_FOR_avx2_gatherdiv4df;
27835 case IX86_BUILTIN_GATHERSIV4SF:
27836 icode = CODE_FOR_avx2_gathersiv4sf;
27838 case IX86_BUILTIN_GATHERSIV8SF:
27839 icode = CODE_FOR_avx2_gathersiv8sf;
27841 case IX86_BUILTIN_GATHERDIV4SF:
27842 icode = CODE_FOR_avx2_gatherdiv4sf;
27844 case IX86_BUILTIN_GATHERDIV8SF:
27845 icode = CODE_FOR_avx2_gatherdiv4sf256;
27847 case IX86_BUILTIN_GATHERSIV2DI:
27848 icode = CODE_FOR_avx2_gathersiv2di;
27850 case IX86_BUILTIN_GATHERSIV4DI:
27851 icode = CODE_FOR_avx2_gathersiv4di;
27853 case IX86_BUILTIN_GATHERDIV2DI:
27854 icode = CODE_FOR_avx2_gatherdiv2di;
27856 case IX86_BUILTIN_GATHERDIV4DI:
27857 icode = CODE_FOR_avx2_gatherdiv4di;
27859 case IX86_BUILTIN_GATHERSIV4SI:
27860 icode = CODE_FOR_avx2_gathersiv4si;
27862 case IX86_BUILTIN_GATHERSIV8SI:
27863 icode = CODE_FOR_avx2_gathersiv8si;
27865 case IX86_BUILTIN_GATHERDIV4SI:
27866 icode = CODE_FOR_avx2_gatherdiv4si;
27868 case IX86_BUILTIN_GATHERDIV8SI:
27869 icode = CODE_FOR_avx2_gatherdiv4si256;
27872 arg0 = CALL_EXPR_ARG (exp, 0);
27873 arg1 = CALL_EXPR_ARG (exp, 1);
27874 arg2 = CALL_EXPR_ARG (exp, 2);
27875 arg3 = CALL_EXPR_ARG (exp, 3);
27876 arg4 = CALL_EXPR_ARG (exp, 4);
27877 op0 = expand_normal (arg0);
27878 op1 = expand_normal (arg1);
27879 op2 = expand_normal (arg2);
27880 op3 = expand_normal (arg3);
27881 op4 = expand_normal (arg4);
27882 /* Note the arg order is different from the operand order. */
27883 mode0 = insn_data[icode].operand[1].mode;
27884 mode1 = insn_data[icode].operand[2].mode;
27885 mode2 = insn_data[icode].operand[3].mode;
27886 mode3 = insn_data[icode].operand[4].mode;
27887 mode4 = insn_data[icode].operand[5].mode;
27889 if (target == NULL_RTX)
27890 target = gen_reg_rtx (insn_data[icode].operand[0].mode);
27892 /* Force memory operand only with base register here. But we
27893 don't want to do it on memory operand for other builtin
27895 if (GET_MODE (op1) != Pmode)
27896 op1 = convert_to_mode (Pmode, op1, 1);
27897 op1 = force_reg (Pmode, op1);
27898 op1 = gen_rtx_MEM (mode1, op1);
27900 if (!insn_data[icode].operand[1].predicate (op0, mode0))
27901 op0 = copy_to_mode_reg (mode0, op0);
27902 if (!insn_data[icode].operand[2].predicate (op1, mode1))
27903 op1 = copy_to_mode_reg (mode1, op1);
27904 if (!insn_data[icode].operand[3].predicate (op2, mode2))
27905 op2 = copy_to_mode_reg (mode2, op2);
27906 if (!insn_data[icode].operand[4].predicate (op3, mode3))
27907 op3 = copy_to_mode_reg (mode3, op3);
27908 if (!insn_data[icode].operand[5].predicate (op4, mode4))
27910 error ("last argument must be scale 1, 2, 4, 8");
27913 pat = GEN_FCN (icode) (target, op0, op1, op2, op3, op4);
27923 for (i = 0, d = bdesc_special_args;
27924 i < ARRAY_SIZE (bdesc_special_args);
27926 if (d->code == fcode)
27927 return ix86_expand_special_args_builtin (d, exp, target);
27929 for (i = 0, d = bdesc_args;
27930 i < ARRAY_SIZE (bdesc_args);
27932 if (d->code == fcode)
27935 case IX86_BUILTIN_FABSQ:
27936 case IX86_BUILTIN_COPYSIGNQ:
27938 /* Emit a normal call if SSE2 isn't available. */
27939 return expand_call (exp, target, ignore);
27941 return ix86_expand_args_builtin (d, exp, target);
27944 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27945 if (d->code == fcode)
27946 return ix86_expand_sse_comi (d, exp, target);
27948 for (i = 0, d = bdesc_pcmpestr;
27949 i < ARRAY_SIZE (bdesc_pcmpestr);
27951 if (d->code == fcode)
27952 return ix86_expand_sse_pcmpestr (d, exp, target);
27954 for (i = 0, d = bdesc_pcmpistr;
27955 i < ARRAY_SIZE (bdesc_pcmpistr);
27957 if (d->code == fcode)
27958 return ix86_expand_sse_pcmpistr (d, exp, target);
27960 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27961 if (d->code == fcode)
27962 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27963 (enum ix86_builtin_func_type)
27964 d->flag, d->comparison);
27966 gcc_unreachable ();
27969 /* Returns a function decl for a vectorized version of the builtin function
27970 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27971 if it is not available. */
27974 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27977 enum machine_mode in_mode, out_mode;
27979 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27981 if (TREE_CODE (type_out) != VECTOR_TYPE
27982 || TREE_CODE (type_in) != VECTOR_TYPE
27983 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27986 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27987 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27988 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27989 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27993 case BUILT_IN_SQRT:
27994 if (out_mode == DFmode && in_mode == DFmode)
27996 if (out_n == 2 && in_n == 2)
27997 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27998 else if (out_n == 4 && in_n == 4)
27999 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
28003 case BUILT_IN_SQRTF:
28004 if (out_mode == SFmode && in_mode == SFmode)
28006 if (out_n == 4 && in_n == 4)
28007 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
28008 else if (out_n == 8 && in_n == 8)
28009 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
28013 case BUILT_IN_LRINT:
28014 if (out_mode == SImode && out_n == 4
28015 && in_mode == DFmode && in_n == 2)
28016 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
28019 case BUILT_IN_LRINTF:
28020 if (out_mode == SImode && in_mode == SFmode)
28022 if (out_n == 4 && in_n == 4)
28023 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
28024 else if (out_n == 8 && in_n == 8)
28025 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
28029 case BUILT_IN_COPYSIGN:
28030 if (out_mode == DFmode && in_mode == DFmode)
28032 if (out_n == 2 && in_n == 2)
28033 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
28034 else if (out_n == 4 && in_n == 4)
28035 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
28039 case BUILT_IN_COPYSIGNF:
28040 if (out_mode == SFmode && in_mode == SFmode)
28042 if (out_n == 4 && in_n == 4)
28043 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
28044 else if (out_n == 8 && in_n == 8)
28045 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
28049 case BUILT_IN_FLOOR:
28050 /* The round insn does not trap on denormals. */
28051 if (flag_trapping_math || !TARGET_ROUND)
28054 if (out_mode == DFmode && in_mode == DFmode)
28056 if (out_n == 2 && in_n == 2)
28057 return ix86_builtins[IX86_BUILTIN_FLOORPD];
28058 else if (out_n == 4 && in_n == 4)
28059 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
28063 case BUILT_IN_FLOORF:
28064 /* The round insn does not trap on denormals. */
28065 if (flag_trapping_math || !TARGET_ROUND)
28068 if (out_mode == SFmode && in_mode == SFmode)
28070 if (out_n == 4 && in_n == 4)
28071 return ix86_builtins[IX86_BUILTIN_FLOORPS];
28072 else if (out_n == 8 && in_n == 8)
28073 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
28077 case BUILT_IN_CEIL:
28078 /* The round insn does not trap on denormals. */
28079 if (flag_trapping_math || !TARGET_ROUND)
28082 if (out_mode == DFmode && in_mode == DFmode)
28084 if (out_n == 2 && in_n == 2)
28085 return ix86_builtins[IX86_BUILTIN_CEILPD];
28086 else if (out_n == 4 && in_n == 4)
28087 return ix86_builtins[IX86_BUILTIN_CEILPD256];
28091 case BUILT_IN_CEILF:
28092 /* The round insn does not trap on denormals. */
28093 if (flag_trapping_math || !TARGET_ROUND)
28096 if (out_mode == SFmode && in_mode == SFmode)
28098 if (out_n == 4 && in_n == 4)
28099 return ix86_builtins[IX86_BUILTIN_CEILPS];
28100 else if (out_n == 8 && in_n == 8)
28101 return ix86_builtins[IX86_BUILTIN_CEILPS256];
28105 case BUILT_IN_TRUNC:
28106 /* The round insn does not trap on denormals. */
28107 if (flag_trapping_math || !TARGET_ROUND)
28110 if (out_mode == DFmode && in_mode == DFmode)
28112 if (out_n == 2 && in_n == 2)
28113 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
28114 else if (out_n == 4 && in_n == 4)
28115 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
28119 case BUILT_IN_TRUNCF:
28120 /* The round insn does not trap on denormals. */
28121 if (flag_trapping_math || !TARGET_ROUND)
28124 if (out_mode == SFmode && in_mode == SFmode)
28126 if (out_n == 4 && in_n == 4)
28127 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
28128 else if (out_n == 8 && in_n == 8)
28129 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
28133 case BUILT_IN_RINT:
28134 /* The round insn does not trap on denormals. */
28135 if (flag_trapping_math || !TARGET_ROUND)
28138 if (out_mode == DFmode && in_mode == DFmode)
28140 if (out_n == 2 && in_n == 2)
28141 return ix86_builtins[IX86_BUILTIN_RINTPD];
28142 else if (out_n == 4 && in_n == 4)
28143 return ix86_builtins[IX86_BUILTIN_RINTPD256];
28147 case BUILT_IN_RINTF:
28148 /* The round insn does not trap on denormals. */
28149 if (flag_trapping_math || !TARGET_ROUND)
28152 if (out_mode == SFmode && in_mode == SFmode)
28154 if (out_n == 4 && in_n == 4)
28155 return ix86_builtins[IX86_BUILTIN_RINTPS];
28156 else if (out_n == 8 && in_n == 8)
28157 return ix86_builtins[IX86_BUILTIN_RINTPS256];
28161 case BUILT_IN_ROUND:
28162 /* The round insn does not trap on denormals. */
28163 if (flag_trapping_math || !TARGET_ROUND)
28166 if (out_mode == DFmode && in_mode == DFmode)
28168 if (out_n == 2 && in_n == 2)
28169 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
28170 else if (out_n == 4 && in_n == 4)
28171 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
28175 case BUILT_IN_ROUNDF:
28176 /* The round insn does not trap on denormals. */
28177 if (flag_trapping_math || !TARGET_ROUND)
28180 if (out_mode == SFmode && in_mode == SFmode)
28182 if (out_n == 4 && in_n == 4)
28183 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
28184 else if (out_n == 8 && in_n == 8)
28185 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
28190 if (out_mode == DFmode && in_mode == DFmode)
28192 if (out_n == 2 && in_n == 2)
28193 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
28194 if (out_n == 4 && in_n == 4)
28195 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
28199 case BUILT_IN_FMAF:
28200 if (out_mode == SFmode && in_mode == SFmode)
28202 if (out_n == 4 && in_n == 4)
28203 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
28204 if (out_n == 8 && in_n == 8)
28205 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
28213 /* Dispatch to a handler for a vectorization library. */
28214 if (ix86_veclib_handler)
28215 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
28221 /* Handler for an SVML-style interface to
28222 a library with vectorized intrinsics. */
28225 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
28228 tree fntype, new_fndecl, args;
28231 enum machine_mode el_mode, in_mode;
28234 /* The SVML is suitable for unsafe math only. */
28235 if (!flag_unsafe_math_optimizations)
28238 el_mode = TYPE_MODE (TREE_TYPE (type_out));
28239 n = TYPE_VECTOR_SUBPARTS (type_out);
28240 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28241 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28242 if (el_mode != in_mode
28250 case BUILT_IN_LOG10:
28252 case BUILT_IN_TANH:
28254 case BUILT_IN_ATAN:
28255 case BUILT_IN_ATAN2:
28256 case BUILT_IN_ATANH:
28257 case BUILT_IN_CBRT:
28258 case BUILT_IN_SINH:
28260 case BUILT_IN_ASINH:
28261 case BUILT_IN_ASIN:
28262 case BUILT_IN_COSH:
28264 case BUILT_IN_ACOSH:
28265 case BUILT_IN_ACOS:
28266 if (el_mode != DFmode || n != 2)
28270 case BUILT_IN_EXPF:
28271 case BUILT_IN_LOGF:
28272 case BUILT_IN_LOG10F:
28273 case BUILT_IN_POWF:
28274 case BUILT_IN_TANHF:
28275 case BUILT_IN_TANF:
28276 case BUILT_IN_ATANF:
28277 case BUILT_IN_ATAN2F:
28278 case BUILT_IN_ATANHF:
28279 case BUILT_IN_CBRTF:
28280 case BUILT_IN_SINHF:
28281 case BUILT_IN_SINF:
28282 case BUILT_IN_ASINHF:
28283 case BUILT_IN_ASINF:
28284 case BUILT_IN_COSHF:
28285 case BUILT_IN_COSF:
28286 case BUILT_IN_ACOSHF:
28287 case BUILT_IN_ACOSF:
28288 if (el_mode != SFmode || n != 4)
28296 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
28298 if (fn == BUILT_IN_LOGF)
28299 strcpy (name, "vmlsLn4");
28300 else if (fn == BUILT_IN_LOG)
28301 strcpy (name, "vmldLn2");
28304 sprintf (name, "vmls%s", bname+10);
28305 name[strlen (name)-1] = '4';
28308 sprintf (name, "vmld%s2", bname+10);
28310 /* Convert to uppercase. */
28314 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
28315 args = TREE_CHAIN (args))
28319 fntype = build_function_type_list (type_out, type_in, NULL);
28321 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
28323 /* Build a function declaration for the vectorized function. */
28324 new_fndecl = build_decl (BUILTINS_LOCATION,
28325 FUNCTION_DECL, get_identifier (name), fntype);
28326 TREE_PUBLIC (new_fndecl) = 1;
28327 DECL_EXTERNAL (new_fndecl) = 1;
28328 DECL_IS_NOVOPS (new_fndecl) = 1;
28329 TREE_READONLY (new_fndecl) = 1;
28334 /* Handler for an ACML-style interface to
28335 a library with vectorized intrinsics. */
28338 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
28340 char name[20] = "__vr.._";
28341 tree fntype, new_fndecl, args;
28344 enum machine_mode el_mode, in_mode;
28347 /* The ACML is 64bits only and suitable for unsafe math only as
28348 it does not correctly support parts of IEEE with the required
28349 precision such as denormals. */
28351 || !flag_unsafe_math_optimizations)
28354 el_mode = TYPE_MODE (TREE_TYPE (type_out));
28355 n = TYPE_VECTOR_SUBPARTS (type_out);
28356 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28357 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28358 if (el_mode != in_mode
28368 case BUILT_IN_LOG2:
28369 case BUILT_IN_LOG10:
28372 if (el_mode != DFmode
28377 case BUILT_IN_SINF:
28378 case BUILT_IN_COSF:
28379 case BUILT_IN_EXPF:
28380 case BUILT_IN_POWF:
28381 case BUILT_IN_LOGF:
28382 case BUILT_IN_LOG2F:
28383 case BUILT_IN_LOG10F:
28386 if (el_mode != SFmode
28395 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
28396 sprintf (name + 7, "%s", bname+10);
28399 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
28400 args = TREE_CHAIN (args))
28404 fntype = build_function_type_list (type_out, type_in, NULL);
28406 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
28408 /* Build a function declaration for the vectorized function. */
28409 new_fndecl = build_decl (BUILTINS_LOCATION,
28410 FUNCTION_DECL, get_identifier (name), fntype);
28411 TREE_PUBLIC (new_fndecl) = 1;
28412 DECL_EXTERNAL (new_fndecl) = 1;
28413 DECL_IS_NOVOPS (new_fndecl) = 1;
28414 TREE_READONLY (new_fndecl) = 1;
28420 /* Returns a decl of a function that implements conversion of an integer vector
28421 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
28422 are the types involved when converting according to CODE.
28423 Return NULL_TREE if it is not available. */
28426 ix86_vectorize_builtin_conversion (unsigned int code,
28427 tree dest_type, tree src_type)
28435 switch (TYPE_MODE (src_type))
28438 switch (TYPE_MODE (dest_type))
28441 return (TYPE_UNSIGNED (src_type)
28442 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
28443 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
28445 return (TYPE_UNSIGNED (src_type)
28447 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
28453 switch (TYPE_MODE (dest_type))
28456 return (TYPE_UNSIGNED (src_type)
28458 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
28467 case FIX_TRUNC_EXPR:
28468 switch (TYPE_MODE (dest_type))
28471 switch (TYPE_MODE (src_type))
28474 return (TYPE_UNSIGNED (dest_type)
28476 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
28478 return (TYPE_UNSIGNED (dest_type)
28480 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
28487 switch (TYPE_MODE (src_type))
28490 return (TYPE_UNSIGNED (dest_type)
28492 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
28509 /* Returns a code for a target-specific builtin that implements
28510 reciprocal of the function, or NULL_TREE if not available. */
28513 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
28514 bool sqrt ATTRIBUTE_UNUSED)
28516 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
28517 && flag_finite_math_only && !flag_trapping_math
28518 && flag_unsafe_math_optimizations))
28522 /* Machine dependent builtins. */
28525 /* Vectorized version of sqrt to rsqrt conversion. */
28526 case IX86_BUILTIN_SQRTPS_NR:
28527 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
28529 case IX86_BUILTIN_SQRTPS_NR256:
28530 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
28536 /* Normal builtins. */
28539 /* Sqrt to rsqrt conversion. */
28540 case BUILT_IN_SQRTF:
28541 return ix86_builtins[IX86_BUILTIN_RSQRTF];
28548 /* Helper for avx_vpermilps256_operand et al. This is also used by
28549 the expansion functions to turn the parallel back into a mask.
28550 The return value is 0 for no match and the imm8+1 for a match. */
28553 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
28555 unsigned i, nelt = GET_MODE_NUNITS (mode);
28557 unsigned char ipar[8];
28559 if (XVECLEN (par, 0) != (int) nelt)
28562 /* Validate that all of the elements are constants, and not totally
28563 out of range. Copy the data into an integral array to make the
28564 subsequent checks easier. */
28565 for (i = 0; i < nelt; ++i)
28567 rtx er = XVECEXP (par, 0, i);
28568 unsigned HOST_WIDE_INT ei;
28570 if (!CONST_INT_P (er))
28581 /* In the 256-bit DFmode case, we can only move elements within
28583 for (i = 0; i < 2; ++i)
28587 mask |= ipar[i] << i;
28589 for (i = 2; i < 4; ++i)
28593 mask |= (ipar[i] - 2) << i;
28598 /* In the 256-bit SFmode case, we have full freedom of movement
28599 within the low 128-bit lane, but the high 128-bit lane must
28600 mirror the exact same pattern. */
28601 for (i = 0; i < 4; ++i)
28602 if (ipar[i] + 4 != ipar[i + 4])
28609 /* In the 128-bit case, we've full freedom in the placement of
28610 the elements from the source operand. */
28611 for (i = 0; i < nelt; ++i)
28612 mask |= ipar[i] << (i * (nelt / 2));
28616 gcc_unreachable ();
28619 /* Make sure success has a non-zero value by adding one. */
28623 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
28624 the expansion functions to turn the parallel back into a mask.
28625 The return value is 0 for no match and the imm8+1 for a match. */
28628 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
28630 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
28632 unsigned char ipar[8];
28634 if (XVECLEN (par, 0) != (int) nelt)
28637 /* Validate that all of the elements are constants, and not totally
28638 out of range. Copy the data into an integral array to make the
28639 subsequent checks easier. */
28640 for (i = 0; i < nelt; ++i)
28642 rtx er = XVECEXP (par, 0, i);
28643 unsigned HOST_WIDE_INT ei;
28645 if (!CONST_INT_P (er))
28648 if (ei >= 2 * nelt)
28653 /* Validate that the halves of the permute are halves. */
28654 for (i = 0; i < nelt2 - 1; ++i)
28655 if (ipar[i] + 1 != ipar[i + 1])
28657 for (i = nelt2; i < nelt - 1; ++i)
28658 if (ipar[i] + 1 != ipar[i + 1])
28661 /* Reconstruct the mask. */
28662 for (i = 0; i < 2; ++i)
28664 unsigned e = ipar[i * nelt2];
28668 mask |= e << (i * 4);
28671 /* Make sure success has a non-zero value by adding one. */
28676 /* Store OPERAND to the memory after reload is completed. This means
28677 that we can't easily use assign_stack_local. */
28679 ix86_force_to_memory (enum machine_mode mode, rtx operand)
28683 gcc_assert (reload_completed);
28684 if (ix86_using_red_zone ())
28686 result = gen_rtx_MEM (mode,
28687 gen_rtx_PLUS (Pmode,
28689 GEN_INT (-RED_ZONE_SIZE)));
28690 emit_move_insn (result, operand);
28692 else if (TARGET_64BIT)
28698 operand = gen_lowpart (DImode, operand);
28702 gen_rtx_SET (VOIDmode,
28703 gen_rtx_MEM (DImode,
28704 gen_rtx_PRE_DEC (DImode,
28705 stack_pointer_rtx)),
28709 gcc_unreachable ();
28711 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28720 split_double_mode (mode, &operand, 1, operands, operands + 1);
28722 gen_rtx_SET (VOIDmode,
28723 gen_rtx_MEM (SImode,
28724 gen_rtx_PRE_DEC (Pmode,
28725 stack_pointer_rtx)),
28728 gen_rtx_SET (VOIDmode,
28729 gen_rtx_MEM (SImode,
28730 gen_rtx_PRE_DEC (Pmode,
28731 stack_pointer_rtx)),
28736 /* Store HImodes as SImodes. */
28737 operand = gen_lowpart (SImode, operand);
28741 gen_rtx_SET (VOIDmode,
28742 gen_rtx_MEM (GET_MODE (operand),
28743 gen_rtx_PRE_DEC (SImode,
28744 stack_pointer_rtx)),
28748 gcc_unreachable ();
28750 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28755 /* Free operand from the memory. */
28757 ix86_free_from_memory (enum machine_mode mode)
28759 if (!ix86_using_red_zone ())
28763 if (mode == DImode || TARGET_64BIT)
28767 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28768 to pop or add instruction if registers are available. */
28769 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
28770 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28775 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28777 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28778 QImode must go into class Q_REGS.
28779 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28780 movdf to do mem-to-mem moves through integer regs. */
28783 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28785 enum machine_mode mode = GET_MODE (x);
28787 /* We're only allowed to return a subclass of CLASS. Many of the
28788 following checks fail for NO_REGS, so eliminate that early. */
28789 if (regclass == NO_REGS)
28792 /* All classes can load zeros. */
28793 if (x == CONST0_RTX (mode))
28796 /* Force constants into memory if we are loading a (nonzero) constant into
28797 an MMX or SSE register. This is because there are no MMX/SSE instructions
28798 to load from a constant. */
28800 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28803 /* Prefer SSE regs only, if we can use them for math. */
28804 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28805 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28807 /* Floating-point constants need more complex checks. */
28808 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28810 /* General regs can load everything. */
28811 if (reg_class_subset_p (regclass, GENERAL_REGS))
28814 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28815 zero above. We only want to wind up preferring 80387 registers if
28816 we plan on doing computation with them. */
28818 && standard_80387_constant_p (x) > 0)
28820 /* Limit class to non-sse. */
28821 if (regclass == FLOAT_SSE_REGS)
28823 if (regclass == FP_TOP_SSE_REGS)
28825 if (regclass == FP_SECOND_SSE_REGS)
28826 return FP_SECOND_REG;
28827 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28834 /* Generally when we see PLUS here, it's the function invariant
28835 (plus soft-fp const_int). Which can only be computed into general
28837 if (GET_CODE (x) == PLUS)
28838 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28840 /* QImode constants are easy to load, but non-constant QImode data
28841 must go into Q_REGS. */
28842 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28844 if (reg_class_subset_p (regclass, Q_REGS))
28846 if (reg_class_subset_p (Q_REGS, regclass))
28854 /* Discourage putting floating-point values in SSE registers unless
28855 SSE math is being used, and likewise for the 387 registers. */
28857 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28859 enum machine_mode mode = GET_MODE (x);
28861 /* Restrict the output reload class to the register bank that we are doing
28862 math on. If we would like not to return a subset of CLASS, reject this
28863 alternative: if reload cannot do this, it will still use its choice. */
28864 mode = GET_MODE (x);
28865 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28866 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28868 if (X87_FLOAT_MODE_P (mode))
28870 if (regclass == FP_TOP_SSE_REGS)
28872 else if (regclass == FP_SECOND_SSE_REGS)
28873 return FP_SECOND_REG;
28875 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28882 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28883 enum machine_mode mode, secondary_reload_info *sri)
28885 /* Double-word spills from general registers to non-offsettable memory
28886 references (zero-extended addresses) require special handling. */
28889 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
28890 && rclass == GENERAL_REGS
28891 && !offsettable_memref_p (x))
28894 ? CODE_FOR_reload_noff_load
28895 : CODE_FOR_reload_noff_store);
28896 /* Add the cost of moving address to a temporary. */
28897 sri->extra_cost = 1;
28902 /* QImode spills from non-QI registers require
28903 intermediate register on 32bit targets. */
28905 && !in_p && mode == QImode
28906 && (rclass == GENERAL_REGS
28907 || rclass == LEGACY_REGS
28908 || rclass == INDEX_REGS))
28917 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28918 regno = true_regnum (x);
28920 /* Return Q_REGS if the operand is in memory. */
28925 /* This condition handles corner case where an expression involving
28926 pointers gets vectorized. We're trying to use the address of a
28927 stack slot as a vector initializer.
28929 (set (reg:V2DI 74 [ vect_cst_.2 ])
28930 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28932 Eventually frame gets turned into sp+offset like this:
28934 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28935 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28936 (const_int 392 [0x188]))))
28938 That later gets turned into:
28940 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28941 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28942 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28944 We'll have the following reload recorded:
28946 Reload 0: reload_in (DI) =
28947 (plus:DI (reg/f:DI 7 sp)
28948 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28949 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28950 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28951 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28952 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28953 reload_reg_rtx: (reg:V2DI 22 xmm1)
28955 Which isn't going to work since SSE instructions can't handle scalar
28956 additions. Returning GENERAL_REGS forces the addition into integer
28957 register and reload can handle subsequent reloads without problems. */
28959 if (in_p && GET_CODE (x) == PLUS
28960 && SSE_CLASS_P (rclass)
28961 && SCALAR_INT_MODE_P (mode))
28962 return GENERAL_REGS;
28967 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28970 ix86_class_likely_spilled_p (reg_class_t rclass)
28981 case SSE_FIRST_REG:
28983 case FP_SECOND_REG:
28993 /* If we are copying between general and FP registers, we need a memory
28994 location. The same is true for SSE and MMX registers.
28996 To optimize register_move_cost performance, allow inline variant.
28998 The macro can't work reliably when one of the CLASSES is class containing
28999 registers from multiple units (SSE, MMX, integer). We avoid this by never
29000 combining those units in single alternative in the machine description.
29001 Ensure that this constraint holds to avoid unexpected surprises.
29003 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
29004 enforce these sanity checks. */
29007 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
29008 enum machine_mode mode, int strict)
29010 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
29011 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
29012 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
29013 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
29014 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
29015 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
29017 gcc_assert (!strict);
29021 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
29024 /* ??? This is a lie. We do have moves between mmx/general, and for
29025 mmx/sse2. But by saying we need secondary memory we discourage the
29026 register allocator from using the mmx registers unless needed. */
29027 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
29030 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
29032 /* SSE1 doesn't have any direct moves from other classes. */
29036 /* If the target says that inter-unit moves are more expensive
29037 than moving through memory, then don't generate them. */
29038 if (!TARGET_INTER_UNIT_MOVES)
29041 /* Between SSE and general, we have moves no larger than word size. */
29042 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
29050 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
29051 enum machine_mode mode, int strict)
29053 return inline_secondary_memory_needed (class1, class2, mode, strict);
29056 /* Implement the TARGET_CLASS_MAX_NREGS hook.
29058 On the 80386, this is the size of MODE in words,
29059 except in the FP regs, where a single reg is always enough. */
29061 static unsigned char
29062 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
29064 if (MAYBE_INTEGER_CLASS_P (rclass))
29066 if (mode == XFmode)
29067 return (TARGET_64BIT ? 2 : 3);
29068 else if (mode == XCmode)
29069 return (TARGET_64BIT ? 4 : 6);
29071 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
29075 if (COMPLEX_MODE_P (mode))
29082 /* Return true if the registers in CLASS cannot represent the change from
29083 modes FROM to TO. */
29086 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
29087 enum reg_class regclass)
29092 /* x87 registers can't do subreg at all, as all values are reformatted
29093 to extended precision. */
29094 if (MAYBE_FLOAT_CLASS_P (regclass))
29097 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
29099 /* Vector registers do not support QI or HImode loads. If we don't
29100 disallow a change to these modes, reload will assume it's ok to
29101 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
29102 the vec_dupv4hi pattern. */
29103 if (GET_MODE_SIZE (from) < 4)
29106 /* Vector registers do not support subreg with nonzero offsets, which
29107 are otherwise valid for integer registers. Since we can't see
29108 whether we have a nonzero offset from here, prohibit all
29109 nonparadoxical subregs changing size. */
29110 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
29117 /* Return the cost of moving data of mode M between a
29118 register and memory. A value of 2 is the default; this cost is
29119 relative to those in `REGISTER_MOVE_COST'.
29121 This function is used extensively by register_move_cost that is used to
29122 build tables at startup. Make it inline in this case.
29123 When IN is 2, return maximum of in and out move cost.
29125 If moving between registers and memory is more expensive than
29126 between two registers, you should define this macro to express the
29129 Model also increased moving costs of QImode registers in non
29133 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
29137 if (FLOAT_CLASS_P (regclass))
29155 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
29156 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
29158 if (SSE_CLASS_P (regclass))
29161 switch (GET_MODE_SIZE (mode))
29176 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
29177 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
29179 if (MMX_CLASS_P (regclass))
29182 switch (GET_MODE_SIZE (mode))
29194 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
29195 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
29197 switch (GET_MODE_SIZE (mode))
29200 if (Q_CLASS_P (regclass) || TARGET_64BIT)
29203 return ix86_cost->int_store[0];
29204 if (TARGET_PARTIAL_REG_DEPENDENCY
29205 && optimize_function_for_speed_p (cfun))
29206 cost = ix86_cost->movzbl_load;
29208 cost = ix86_cost->int_load[0];
29210 return MAX (cost, ix86_cost->int_store[0]);
29216 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
29218 return ix86_cost->movzbl_load;
29220 return ix86_cost->int_store[0] + 4;
29225 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
29226 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
29228 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
29229 if (mode == TFmode)
29232 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
29234 cost = ix86_cost->int_load[2];
29236 cost = ix86_cost->int_store[2];
29237 return (cost * (((int) GET_MODE_SIZE (mode)
29238 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
29243 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
29246 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
29250 /* Return the cost of moving data from a register in class CLASS1 to
29251 one in class CLASS2.
29253 It is not required that the cost always equal 2 when FROM is the same as TO;
29254 on some machines it is expensive to move between registers if they are not
29255 general registers. */
29258 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
29259 reg_class_t class2_i)
29261 enum reg_class class1 = (enum reg_class) class1_i;
29262 enum reg_class class2 = (enum reg_class) class2_i;
29264 /* In case we require secondary memory, compute cost of the store followed
29265 by load. In order to avoid bad register allocation choices, we need
29266 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
29268 if (inline_secondary_memory_needed (class1, class2, mode, 0))
29272 cost += inline_memory_move_cost (mode, class1, 2);
29273 cost += inline_memory_move_cost (mode, class2, 2);
29275 /* In case of copying from general_purpose_register we may emit multiple
29276 stores followed by single load causing memory size mismatch stall.
29277 Count this as arbitrarily high cost of 20. */
29278 if (targetm.class_max_nregs (class1, mode)
29279 > targetm.class_max_nregs (class2, mode))
29282 /* In the case of FP/MMX moves, the registers actually overlap, and we
29283 have to switch modes in order to treat them differently. */
29284 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
29285 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
29291 /* Moves between SSE/MMX and integer unit are expensive. */
29292 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
29293 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
29295 /* ??? By keeping returned value relatively high, we limit the number
29296 of moves between integer and MMX/SSE registers for all targets.
29297 Additionally, high value prevents problem with x86_modes_tieable_p(),
29298 where integer modes in MMX/SSE registers are not tieable
29299 because of missing QImode and HImode moves to, from or between
29300 MMX/SSE registers. */
29301 return MAX (8, ix86_cost->mmxsse_to_integer);
29303 if (MAYBE_FLOAT_CLASS_P (class1))
29304 return ix86_cost->fp_move;
29305 if (MAYBE_SSE_CLASS_P (class1))
29306 return ix86_cost->sse_move;
29307 if (MAYBE_MMX_CLASS_P (class1))
29308 return ix86_cost->mmx_move;
29312 /* Return TRUE if hard register REGNO can hold a value of machine-mode
29316 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
29318 /* Flags and only flags can only hold CCmode values. */
29319 if (CC_REGNO_P (regno))
29320 return GET_MODE_CLASS (mode) == MODE_CC;
29321 if (GET_MODE_CLASS (mode) == MODE_CC
29322 || GET_MODE_CLASS (mode) == MODE_RANDOM
29323 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
29325 if (FP_REGNO_P (regno))
29326 return VALID_FP_MODE_P (mode);
29327 if (SSE_REGNO_P (regno))
29329 /* We implement the move patterns for all vector modes into and
29330 out of SSE registers, even when no operation instructions
29331 are available. OImode move is available only when AVX is
29333 return ((TARGET_AVX && mode == OImode)
29334 || VALID_AVX256_REG_MODE (mode)
29335 || VALID_SSE_REG_MODE (mode)
29336 || VALID_SSE2_REG_MODE (mode)
29337 || VALID_MMX_REG_MODE (mode)
29338 || VALID_MMX_REG_MODE_3DNOW (mode));
29340 if (MMX_REGNO_P (regno))
29342 /* We implement the move patterns for 3DNOW modes even in MMX mode,
29343 so if the register is available at all, then we can move data of
29344 the given mode into or out of it. */
29345 return (VALID_MMX_REG_MODE (mode)
29346 || VALID_MMX_REG_MODE_3DNOW (mode));
29349 if (mode == QImode)
29351 /* Take care for QImode values - they can be in non-QI regs,
29352 but then they do cause partial register stalls. */
29353 if (regno <= BX_REG || TARGET_64BIT)
29355 if (!TARGET_PARTIAL_REG_STALL)
29357 return !can_create_pseudo_p ();
29359 /* We handle both integer and floats in the general purpose registers. */
29360 else if (VALID_INT_MODE_P (mode))
29362 else if (VALID_FP_MODE_P (mode))
29364 else if (VALID_DFP_MODE_P (mode))
29366 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
29367 on to use that value in smaller contexts, this can easily force a
29368 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
29369 supporting DImode, allow it. */
29370 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
29376 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
29377 tieable integer mode. */
29380 ix86_tieable_integer_mode_p (enum machine_mode mode)
29389 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
29392 return TARGET_64BIT;
29399 /* Return true if MODE1 is accessible in a register that can hold MODE2
29400 without copying. That is, all register classes that can hold MODE2
29401 can also hold MODE1. */
29404 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
29406 if (mode1 == mode2)
29409 if (ix86_tieable_integer_mode_p (mode1)
29410 && ix86_tieable_integer_mode_p (mode2))
29413 /* MODE2 being XFmode implies fp stack or general regs, which means we
29414 can tie any smaller floating point modes to it. Note that we do not
29415 tie this with TFmode. */
29416 if (mode2 == XFmode)
29417 return mode1 == SFmode || mode1 == DFmode;
29419 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
29420 that we can tie it with SFmode. */
29421 if (mode2 == DFmode)
29422 return mode1 == SFmode;
29424 /* If MODE2 is only appropriate for an SSE register, then tie with
29425 any other mode acceptable to SSE registers. */
29426 if (GET_MODE_SIZE (mode2) == 16
29427 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
29428 return (GET_MODE_SIZE (mode1) == 16
29429 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
29431 /* If MODE2 is appropriate for an MMX register, then tie
29432 with any other mode acceptable to MMX registers. */
29433 if (GET_MODE_SIZE (mode2) == 8
29434 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
29435 return (GET_MODE_SIZE (mode1) == 8
29436 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
29441 /* Compute a (partial) cost for rtx X. Return true if the complete
29442 cost has been computed, and false if subexpressions should be
29443 scanned. In either case, *TOTAL contains the cost result. */
29446 ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
29449 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
29450 enum machine_mode mode = GET_MODE (x);
29451 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
29459 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
29461 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
29463 else if (flag_pic && SYMBOLIC_CONST (x)
29465 || (!GET_CODE (x) != LABEL_REF
29466 && (GET_CODE (x) != SYMBOL_REF
29467 || !SYMBOL_REF_LOCAL_P (x)))))
29474 if (mode == VOIDmode)
29477 switch (standard_80387_constant_p (x))
29482 default: /* Other constants */
29487 /* Start with (MEM (SYMBOL_REF)), since that's where
29488 it'll probably end up. Add a penalty for size. */
29489 *total = (COSTS_N_INSNS (1)
29490 + (flag_pic != 0 && !TARGET_64BIT)
29491 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
29497 /* The zero extensions is often completely free on x86_64, so make
29498 it as cheap as possible. */
29499 if (TARGET_64BIT && mode == DImode
29500 && GET_MODE (XEXP (x, 0)) == SImode)
29502 else if (TARGET_ZERO_EXTEND_WITH_AND)
29503 *total = cost->add;
29505 *total = cost->movzx;
29509 *total = cost->movsx;
29513 if (CONST_INT_P (XEXP (x, 1))
29514 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
29516 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29519 *total = cost->add;
29522 if ((value == 2 || value == 3)
29523 && cost->lea <= cost->shift_const)
29525 *total = cost->lea;
29535 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
29537 if (CONST_INT_P (XEXP (x, 1)))
29539 if (INTVAL (XEXP (x, 1)) > 32)
29540 *total = cost->shift_const + COSTS_N_INSNS (2);
29542 *total = cost->shift_const * 2;
29546 if (GET_CODE (XEXP (x, 1)) == AND)
29547 *total = cost->shift_var * 2;
29549 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
29554 if (CONST_INT_P (XEXP (x, 1)))
29555 *total = cost->shift_const;
29557 *total = cost->shift_var;
29565 gcc_assert (FLOAT_MODE_P (mode));
29566 gcc_assert (TARGET_FMA || TARGET_FMA4);
29568 /* ??? SSE scalar/vector cost should be used here. */
29569 /* ??? Bald assumption that fma has the same cost as fmul. */
29570 *total = cost->fmul;
29571 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
29573 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
29575 if (GET_CODE (sub) == NEG)
29576 sub = XEXP (sub, 0);
29577 *total += rtx_cost (sub, FMA, 0, speed);
29580 if (GET_CODE (sub) == NEG)
29581 sub = XEXP (sub, 0);
29582 *total += rtx_cost (sub, FMA, 2, speed);
29587 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29589 /* ??? SSE scalar cost should be used here. */
29590 *total = cost->fmul;
29593 else if (X87_FLOAT_MODE_P (mode))
29595 *total = cost->fmul;
29598 else if (FLOAT_MODE_P (mode))
29600 /* ??? SSE vector cost should be used here. */
29601 *total = cost->fmul;
29606 rtx op0 = XEXP (x, 0);
29607 rtx op1 = XEXP (x, 1);
29609 if (CONST_INT_P (XEXP (x, 1)))
29611 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29612 for (nbits = 0; value != 0; value &= value - 1)
29616 /* This is arbitrary. */
29619 /* Compute costs correctly for widening multiplication. */
29620 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
29621 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
29622 == GET_MODE_SIZE (mode))
29624 int is_mulwiden = 0;
29625 enum machine_mode inner_mode = GET_MODE (op0);
29627 if (GET_CODE (op0) == GET_CODE (op1))
29628 is_mulwiden = 1, op1 = XEXP (op1, 0);
29629 else if (CONST_INT_P (op1))
29631 if (GET_CODE (op0) == SIGN_EXTEND)
29632 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
29635 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
29639 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
29642 *total = (cost->mult_init[MODE_INDEX (mode)]
29643 + nbits * cost->mult_bit
29644 + rtx_cost (op0, outer_code, opno, speed)
29645 + rtx_cost (op1, outer_code, opno, speed));
29654 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29655 /* ??? SSE cost should be used here. */
29656 *total = cost->fdiv;
29657 else if (X87_FLOAT_MODE_P (mode))
29658 *total = cost->fdiv;
29659 else if (FLOAT_MODE_P (mode))
29660 /* ??? SSE vector cost should be used here. */
29661 *total = cost->fdiv;
29663 *total = cost->divide[MODE_INDEX (mode)];
29667 if (GET_MODE_CLASS (mode) == MODE_INT
29668 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
29670 if (GET_CODE (XEXP (x, 0)) == PLUS
29671 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
29672 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
29673 && CONSTANT_P (XEXP (x, 1)))
29675 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
29676 if (val == 2 || val == 4 || val == 8)
29678 *total = cost->lea;
29679 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
29680 outer_code, opno, speed);
29681 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
29682 outer_code, opno, speed);
29683 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
29687 else if (GET_CODE (XEXP (x, 0)) == MULT
29688 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
29690 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
29691 if (val == 2 || val == 4 || val == 8)
29693 *total = cost->lea;
29694 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
29695 outer_code, opno, speed);
29696 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
29700 else if (GET_CODE (XEXP (x, 0)) == PLUS)
29702 *total = cost->lea;
29703 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
29704 outer_code, opno, speed);
29705 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
29706 outer_code, opno, speed);
29707 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
29714 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29716 /* ??? SSE cost should be used here. */
29717 *total = cost->fadd;
29720 else if (X87_FLOAT_MODE_P (mode))
29722 *total = cost->fadd;
29725 else if (FLOAT_MODE_P (mode))
29727 /* ??? SSE vector cost should be used here. */
29728 *total = cost->fadd;
29736 if (!TARGET_64BIT && mode == DImode)
29738 *total = (cost->add * 2
29739 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
29740 << (GET_MODE (XEXP (x, 0)) != DImode))
29741 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
29742 << (GET_MODE (XEXP (x, 1)) != DImode)));
29748 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29750 /* ??? SSE cost should be used here. */
29751 *total = cost->fchs;
29754 else if (X87_FLOAT_MODE_P (mode))
29756 *total = cost->fchs;
29759 else if (FLOAT_MODE_P (mode))
29761 /* ??? SSE vector cost should be used here. */
29762 *total = cost->fchs;
29768 if (!TARGET_64BIT && mode == DImode)
29769 *total = cost->add * 2;
29771 *total = cost->add;
29775 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
29776 && XEXP (XEXP (x, 0), 1) == const1_rtx
29777 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
29778 && XEXP (x, 1) == const0_rtx)
29780 /* This kind of construct is implemented using test[bwl].
29781 Treat it as if we had an AND. */
29782 *total = (cost->add
29783 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
29784 + rtx_cost (const1_rtx, outer_code, opno, speed));
29790 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
29795 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29796 /* ??? SSE cost should be used here. */
29797 *total = cost->fabs;
29798 else if (X87_FLOAT_MODE_P (mode))
29799 *total = cost->fabs;
29800 else if (FLOAT_MODE_P (mode))
29801 /* ??? SSE vector cost should be used here. */
29802 *total = cost->fabs;
29806 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29807 /* ??? SSE cost should be used here. */
29808 *total = cost->fsqrt;
29809 else if (X87_FLOAT_MODE_P (mode))
29810 *total = cost->fsqrt;
29811 else if (FLOAT_MODE_P (mode))
29812 /* ??? SSE vector cost should be used here. */
29813 *total = cost->fsqrt;
29817 if (XINT (x, 1) == UNSPEC_TP)
29824 case VEC_DUPLICATE:
29825 /* ??? Assume all of these vector manipulation patterns are
29826 recognizable. In which case they all pretty much have the
29828 *total = COSTS_N_INSNS (1);
29838 static int current_machopic_label_num;
29840 /* Given a symbol name and its associated stub, write out the
29841 definition of the stub. */
29844 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29846 unsigned int length;
29847 char *binder_name, *symbol_name, lazy_ptr_name[32];
29848 int label = ++current_machopic_label_num;
29850 /* For 64-bit we shouldn't get here. */
29851 gcc_assert (!TARGET_64BIT);
29853 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29854 symb = targetm.strip_name_encoding (symb);
29856 length = strlen (stub);
29857 binder_name = XALLOCAVEC (char, length + 32);
29858 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29860 length = strlen (symb);
29861 symbol_name = XALLOCAVEC (char, length + 32);
29862 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29864 sprintf (lazy_ptr_name, "L%d$lz", label);
29866 if (MACHOPIC_ATT_STUB)
29867 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29868 else if (MACHOPIC_PURE)
29869 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29871 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29873 fprintf (file, "%s:\n", stub);
29874 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29876 if (MACHOPIC_ATT_STUB)
29878 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29880 else if (MACHOPIC_PURE)
29883 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29884 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29885 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29886 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
29887 label, lazy_ptr_name, label);
29888 fprintf (file, "\tjmp\t*%%ecx\n");
29891 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29893 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29894 it needs no stub-binding-helper. */
29895 if (MACHOPIC_ATT_STUB)
29898 fprintf (file, "%s:\n", binder_name);
29902 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29903 fprintf (file, "\tpushl\t%%ecx\n");
29906 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29908 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29910 /* N.B. Keep the correspondence of these
29911 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29912 old-pic/new-pic/non-pic stubs; altering this will break
29913 compatibility with existing dylibs. */
29916 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29917 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29920 /* 16-byte -mdynamic-no-pic stub. */
29921 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29923 fprintf (file, "%s:\n", lazy_ptr_name);
29924 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29925 fprintf (file, ASM_LONG "%s\n", binder_name);
29927 #endif /* TARGET_MACHO */
29929 /* Order the registers for register allocator. */
29932 x86_order_regs_for_local_alloc (void)
29937 /* First allocate the local general purpose registers. */
29938 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29939 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29940 reg_alloc_order [pos++] = i;
29942 /* Global general purpose registers. */
29943 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29944 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29945 reg_alloc_order [pos++] = i;
29947 /* x87 registers come first in case we are doing FP math
29949 if (!TARGET_SSE_MATH)
29950 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29951 reg_alloc_order [pos++] = i;
29953 /* SSE registers. */
29954 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29955 reg_alloc_order [pos++] = i;
29956 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29957 reg_alloc_order [pos++] = i;
29959 /* x87 registers. */
29960 if (TARGET_SSE_MATH)
29961 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29962 reg_alloc_order [pos++] = i;
29964 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29965 reg_alloc_order [pos++] = i;
29967 /* Initialize the rest of array as we do not allocate some registers
29969 while (pos < FIRST_PSEUDO_REGISTER)
29970 reg_alloc_order [pos++] = 0;
29973 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29974 in struct attribute_spec handler. */
29976 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29978 int flags ATTRIBUTE_UNUSED,
29979 bool *no_add_attrs)
29981 if (TREE_CODE (*node) != FUNCTION_TYPE
29982 && TREE_CODE (*node) != METHOD_TYPE
29983 && TREE_CODE (*node) != FIELD_DECL
29984 && TREE_CODE (*node) != TYPE_DECL)
29986 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29988 *no_add_attrs = true;
29993 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29995 *no_add_attrs = true;
29998 if (is_attribute_p ("callee_pop_aggregate_return", name))
30002 cst = TREE_VALUE (args);
30003 if (TREE_CODE (cst) != INTEGER_CST)
30005 warning (OPT_Wattributes,
30006 "%qE attribute requires an integer constant argument",
30008 *no_add_attrs = true;
30010 else if (compare_tree_int (cst, 0) != 0
30011 && compare_tree_int (cst, 1) != 0)
30013 warning (OPT_Wattributes,
30014 "argument to %qE attribute is neither zero, nor one",
30016 *no_add_attrs = true;
30025 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
30026 struct attribute_spec.handler. */
30028 ix86_handle_abi_attribute (tree *node, tree name,
30029 tree args ATTRIBUTE_UNUSED,
30030 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30032 if (TREE_CODE (*node) != FUNCTION_TYPE
30033 && TREE_CODE (*node) != METHOD_TYPE
30034 && TREE_CODE (*node) != FIELD_DECL
30035 && TREE_CODE (*node) != TYPE_DECL)
30037 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30039 *no_add_attrs = true;
30043 /* Can combine regparm with all attributes but fastcall. */
30044 if (is_attribute_p ("ms_abi", name))
30046 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
30048 error ("ms_abi and sysv_abi attributes are not compatible");
30053 else if (is_attribute_p ("sysv_abi", name))
30055 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
30057 error ("ms_abi and sysv_abi attributes are not compatible");
30066 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
30067 struct attribute_spec.handler. */
30069 ix86_handle_struct_attribute (tree *node, tree name,
30070 tree args ATTRIBUTE_UNUSED,
30071 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30074 if (DECL_P (*node))
30076 if (TREE_CODE (*node) == TYPE_DECL)
30077 type = &TREE_TYPE (*node);
30082 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
30083 || TREE_CODE (*type) == UNION_TYPE)))
30085 warning (OPT_Wattributes, "%qE attribute ignored",
30087 *no_add_attrs = true;
30090 else if ((is_attribute_p ("ms_struct", name)
30091 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
30092 || ((is_attribute_p ("gcc_struct", name)
30093 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
30095 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
30097 *no_add_attrs = true;
30104 ix86_handle_fndecl_attribute (tree *node, tree name,
30105 tree args ATTRIBUTE_UNUSED,
30106 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30108 if (TREE_CODE (*node) != FUNCTION_DECL)
30110 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30112 *no_add_attrs = true;
30118 ix86_ms_bitfield_layout_p (const_tree record_type)
30120 return ((TARGET_MS_BITFIELD_LAYOUT
30121 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
30122 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
30125 /* Returns an expression indicating where the this parameter is
30126 located on entry to the FUNCTION. */
30129 x86_this_parameter (tree function)
30131 tree type = TREE_TYPE (function);
30132 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
30137 const int *parm_regs;
30139 if (ix86_function_type_abi (type) == MS_ABI)
30140 parm_regs = x86_64_ms_abi_int_parameter_registers;
30142 parm_regs = x86_64_int_parameter_registers;
30143 return gen_rtx_REG (DImode, parm_regs[aggr]);
30146 nregs = ix86_function_regparm (type, function);
30148 if (nregs > 0 && !stdarg_p (type))
30151 unsigned int ccvt = ix86_get_callcvt (type);
30153 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
30154 regno = aggr ? DX_REG : CX_REG;
30155 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
30159 return gen_rtx_MEM (SImode,
30160 plus_constant (stack_pointer_rtx, 4));
30169 return gen_rtx_MEM (SImode,
30170 plus_constant (stack_pointer_rtx, 4));
30173 return gen_rtx_REG (SImode, regno);
30176 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
30179 /* Determine whether x86_output_mi_thunk can succeed. */
30182 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
30183 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
30184 HOST_WIDE_INT vcall_offset, const_tree function)
30186 /* 64-bit can handle anything. */
30190 /* For 32-bit, everything's fine if we have one free register. */
30191 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
30194 /* Need a free register for vcall_offset. */
30198 /* Need a free register for GOT references. */
30199 if (flag_pic && !targetm.binds_local_p (function))
30202 /* Otherwise ok. */
30206 /* Output the assembler code for a thunk function. THUNK_DECL is the
30207 declaration for the thunk function itself, FUNCTION is the decl for
30208 the target function. DELTA is an immediate constant offset to be
30209 added to THIS. If VCALL_OFFSET is nonzero, the word at
30210 *(*this + vcall_offset) should be added to THIS. */
30213 x86_output_mi_thunk (FILE *file,
30214 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
30215 HOST_WIDE_INT vcall_offset, tree function)
30217 rtx this_param = x86_this_parameter (function);
30218 rtx this_reg, tmp, fnaddr;
30220 emit_note (NOTE_INSN_PROLOGUE_END);
30222 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
30223 pull it in now and let DELTA benefit. */
30224 if (REG_P (this_param))
30225 this_reg = this_param;
30226 else if (vcall_offset)
30228 /* Put the this parameter into %eax. */
30229 this_reg = gen_rtx_REG (Pmode, AX_REG);
30230 emit_move_insn (this_reg, this_param);
30233 this_reg = NULL_RTX;
30235 /* Adjust the this parameter by a fixed constant. */
30238 rtx delta_rtx = GEN_INT (delta);
30239 rtx delta_dst = this_reg ? this_reg : this_param;
30243 if (!x86_64_general_operand (delta_rtx, Pmode))
30245 tmp = gen_rtx_REG (Pmode, R10_REG);
30246 emit_move_insn (tmp, delta_rtx);
30251 emit_insn (ix86_gen_add3 (delta_dst, delta_dst, delta_rtx));
30254 /* Adjust the this parameter by a value stored in the vtable. */
30257 rtx vcall_addr, vcall_mem, this_mem;
30258 unsigned int tmp_regno;
30261 tmp_regno = R10_REG;
30264 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
30265 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
30266 tmp_regno = AX_REG;
30268 tmp_regno = CX_REG;
30270 tmp = gen_rtx_REG (Pmode, tmp_regno);
30272 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
30273 if (Pmode != ptr_mode)
30274 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
30275 emit_move_insn (tmp, this_mem);
30277 /* Adjust the this parameter. */
30278 vcall_addr = plus_constant (tmp, vcall_offset);
30280 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
30282 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
30283 emit_move_insn (tmp2, GEN_INT (vcall_offset));
30284 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
30287 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
30288 if (Pmode != ptr_mode)
30289 emit_insn (gen_addsi_1_zext (this_reg,
30290 gen_rtx_REG (ptr_mode,
30294 emit_insn (ix86_gen_add3 (this_reg, this_reg, vcall_mem));
30297 /* If necessary, drop THIS back to its stack slot. */
30298 if (this_reg && this_reg != this_param)
30299 emit_move_insn (this_param, this_reg);
30301 fnaddr = XEXP (DECL_RTL (function), 0);
30304 if (!flag_pic || targetm.binds_local_p (function)
30305 || cfun->machine->call_abi == MS_ABI)
30309 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
30310 tmp = gen_rtx_CONST (Pmode, tmp);
30311 fnaddr = gen_rtx_MEM (Pmode, tmp);
30316 if (!flag_pic || targetm.binds_local_p (function))
30319 else if (TARGET_MACHO)
30321 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
30322 fnaddr = XEXP (fnaddr, 0);
30324 #endif /* TARGET_MACHO */
30327 tmp = gen_rtx_REG (Pmode, CX_REG);
30328 output_set_got (tmp, NULL_RTX);
30330 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
30331 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
30332 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
30336 /* Our sibling call patterns do not allow memories, because we have no
30337 predicate that can distinguish between frame and non-frame memory.
30338 For our purposes here, we can get away with (ab)using a jump pattern,
30339 because we're going to do no optimization. */
30340 if (MEM_P (fnaddr))
30341 emit_jump_insn (gen_indirect_jump (fnaddr));
30344 tmp = gen_rtx_MEM (QImode, fnaddr);
30345 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
30346 tmp = emit_call_insn (tmp);
30347 SIBLING_CALL_P (tmp) = 1;
30351 /* Emit just enough of rest_of_compilation to get the insns emitted.
30352 Note that use_thunk calls assemble_start_function et al. */
30353 tmp = get_insns ();
30354 insn_locators_alloc ();
30355 shorten_branches (tmp);
30356 final_start_function (tmp, file, 1);
30357 final (tmp, file, 1);
30358 final_end_function ();
30362 x86_file_start (void)
30364 default_file_start ();
30366 darwin_file_start ();
30368 if (X86_FILE_START_VERSION_DIRECTIVE)
30369 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
30370 if (X86_FILE_START_FLTUSED)
30371 fputs ("\t.global\t__fltused\n", asm_out_file);
30372 if (ix86_asm_dialect == ASM_INTEL)
30373 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
30377 x86_field_alignment (tree field, int computed)
30379 enum machine_mode mode;
30380 tree type = TREE_TYPE (field);
30382 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
30384 mode = TYPE_MODE (strip_array_types (type));
30385 if (mode == DFmode || mode == DCmode
30386 || GET_MODE_CLASS (mode) == MODE_INT
30387 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
30388 return MIN (32, computed);
30392 /* Output assembler code to FILE to increment profiler label # LABELNO
30393 for profiling a function entry. */
30395 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
30397 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
30402 #ifndef NO_PROFILE_COUNTERS
30403 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
30406 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
30407 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
30409 fprintf (file, "\tcall\t%s\n", mcount_name);
30413 #ifndef NO_PROFILE_COUNTERS
30414 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
30417 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
30421 #ifndef NO_PROFILE_COUNTERS
30422 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
30425 fprintf (file, "\tcall\t%s\n", mcount_name);
30429 /* We don't have exact information about the insn sizes, but we may assume
30430 quite safely that we are informed about all 1 byte insns and memory
30431 address sizes. This is enough to eliminate unnecessary padding in
30435 min_insn_size (rtx insn)
30439 if (!INSN_P (insn) || !active_insn_p (insn))
30442 /* Discard alignments we've emit and jump instructions. */
30443 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
30444 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
30446 if (JUMP_TABLE_DATA_P (insn))
30449 /* Important case - calls are always 5 bytes.
30450 It is common to have many calls in the row. */
30452 && symbolic_reference_mentioned_p (PATTERN (insn))
30453 && !SIBLING_CALL_P (insn))
30455 len = get_attr_length (insn);
30459 /* For normal instructions we rely on get_attr_length being exact,
30460 with a few exceptions. */
30461 if (!JUMP_P (insn))
30463 enum attr_type type = get_attr_type (insn);
30468 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
30469 || asm_noperands (PATTERN (insn)) >= 0)
30476 /* Otherwise trust get_attr_length. */
30480 l = get_attr_length_address (insn);
30481 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
30490 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30492 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
30496 ix86_avoid_jump_mispredicts (void)
30498 rtx insn, start = get_insns ();
30499 int nbytes = 0, njumps = 0;
30502 /* Look for all minimal intervals of instructions containing 4 jumps.
30503 The intervals are bounded by START and INSN. NBYTES is the total
30504 size of instructions in the interval including INSN and not including
30505 START. When the NBYTES is smaller than 16 bytes, it is possible
30506 that the end of START and INSN ends up in the same 16byte page.
30508 The smallest offset in the page INSN can start is the case where START
30509 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
30510 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
30512 for (insn = start; insn; insn = NEXT_INSN (insn))
30516 if (LABEL_P (insn))
30518 int align = label_to_alignment (insn);
30519 int max_skip = label_to_max_skip (insn);
30523 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
30524 already in the current 16 byte page, because otherwise
30525 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
30526 bytes to reach 16 byte boundary. */
30528 || (align <= 3 && max_skip != (1 << align) - 1))
30531 fprintf (dump_file, "Label %i with max_skip %i\n",
30532 INSN_UID (insn), max_skip);
30535 while (nbytes + max_skip >= 16)
30537 start = NEXT_INSN (start);
30538 if ((JUMP_P (start)
30539 && GET_CODE (PATTERN (start)) != ADDR_VEC
30540 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
30542 njumps--, isjump = 1;
30545 nbytes -= min_insn_size (start);
30551 min_size = min_insn_size (insn);
30552 nbytes += min_size;
30554 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
30555 INSN_UID (insn), min_size);
30557 && GET_CODE (PATTERN (insn)) != ADDR_VEC
30558 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
30566 start = NEXT_INSN (start);
30567 if ((JUMP_P (start)
30568 && GET_CODE (PATTERN (start)) != ADDR_VEC
30569 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
30571 njumps--, isjump = 1;
30574 nbytes -= min_insn_size (start);
30576 gcc_assert (njumps >= 0);
30578 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
30579 INSN_UID (start), INSN_UID (insn), nbytes);
30581 if (njumps == 3 && isjump && nbytes < 16)
30583 int padsize = 15 - nbytes + min_insn_size (insn);
30586 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
30587 INSN_UID (insn), padsize);
30588 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
30594 /* AMD Athlon works faster
30595 when RET is not destination of conditional jump or directly preceded
30596 by other jump instruction. We avoid the penalty by inserting NOP just
30597 before the RET instructions in such cases. */
30599 ix86_pad_returns (void)
30604 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30606 basic_block bb = e->src;
30607 rtx ret = BB_END (bb);
30609 bool replace = false;
30611 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
30612 || optimize_bb_for_size_p (bb))
30614 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
30615 if (active_insn_p (prev) || LABEL_P (prev))
30617 if (prev && LABEL_P (prev))
30622 FOR_EACH_EDGE (e, ei, bb->preds)
30623 if (EDGE_FREQUENCY (e) && e->src->index >= 0
30624 && !(e->flags & EDGE_FALLTHRU))
30629 prev = prev_active_insn (ret);
30631 && ((JUMP_P (prev) && any_condjump_p (prev))
30634 /* Empty functions get branch mispredict even when
30635 the jump destination is not visible to us. */
30636 if (!prev && !optimize_function_for_size_p (cfun))
30641 emit_jump_insn_before (gen_return_internal_long (), ret);
30647 /* Count the minimum number of instructions in BB. Return 4 if the
30648 number of instructions >= 4. */
30651 ix86_count_insn_bb (basic_block bb)
30654 int insn_count = 0;
30656 /* Count number of instructions in this block. Return 4 if the number
30657 of instructions >= 4. */
30658 FOR_BB_INSNS (bb, insn)
30660 /* Only happen in exit blocks. */
30662 && GET_CODE (PATTERN (insn)) == RETURN)
30665 if (NONDEBUG_INSN_P (insn)
30666 && GET_CODE (PATTERN (insn)) != USE
30667 && GET_CODE (PATTERN (insn)) != CLOBBER)
30670 if (insn_count >= 4)
30679 /* Count the minimum number of instructions in code path in BB.
30680 Return 4 if the number of instructions >= 4. */
30683 ix86_count_insn (basic_block bb)
30687 int min_prev_count;
30689 /* Only bother counting instructions along paths with no
30690 more than 2 basic blocks between entry and exit. Given
30691 that BB has an edge to exit, determine if a predecessor
30692 of BB has an edge from entry. If so, compute the number
30693 of instructions in the predecessor block. If there
30694 happen to be multiple such blocks, compute the minimum. */
30695 min_prev_count = 4;
30696 FOR_EACH_EDGE (e, ei, bb->preds)
30699 edge_iterator prev_ei;
30701 if (e->src == ENTRY_BLOCK_PTR)
30703 min_prev_count = 0;
30706 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
30708 if (prev_e->src == ENTRY_BLOCK_PTR)
30710 int count = ix86_count_insn_bb (e->src);
30711 if (count < min_prev_count)
30712 min_prev_count = count;
30718 if (min_prev_count < 4)
30719 min_prev_count += ix86_count_insn_bb (bb);
30721 return min_prev_count;
30724 /* Pad short funtion to 4 instructions. */
30727 ix86_pad_short_function (void)
30732 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30734 rtx ret = BB_END (e->src);
30735 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
30737 int insn_count = ix86_count_insn (e->src);
30739 /* Pad short function. */
30740 if (insn_count < 4)
30744 /* Find epilogue. */
30747 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
30748 insn = PREV_INSN (insn);
30753 /* Two NOPs count as one instruction. */
30754 insn_count = 2 * (4 - insn_count);
30755 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
30761 /* Implement machine specific optimizations. We implement padding of returns
30762 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
30766 /* We are freeing block_for_insn in the toplev to keep compatibility
30767 with old MDEP_REORGS that are not CFG based. Recompute it now. */
30768 compute_bb_for_insn ();
30770 /* Run the vzeroupper optimization if needed. */
30771 if (TARGET_VZEROUPPER)
30772 move_or_delete_vzeroupper ();
30774 if (optimize && optimize_function_for_speed_p (cfun))
30776 if (TARGET_PAD_SHORT_FUNCTION)
30777 ix86_pad_short_function ();
30778 else if (TARGET_PAD_RETURNS)
30779 ix86_pad_returns ();
30780 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30781 if (TARGET_FOUR_JUMP_LIMIT)
30782 ix86_avoid_jump_mispredicts ();
30787 /* Return nonzero when QImode register that must be represented via REX prefix
30790 x86_extended_QIreg_mentioned_p (rtx insn)
30793 extract_insn_cached (insn);
30794 for (i = 0; i < recog_data.n_operands; i++)
30795 if (REG_P (recog_data.operand[i])
30796 && REGNO (recog_data.operand[i]) > BX_REG)
30801 /* Return nonzero when P points to register encoded via REX prefix.
30802 Called via for_each_rtx. */
30804 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
30806 unsigned int regno;
30809 regno = REGNO (*p);
30810 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
30813 /* Return true when INSN mentions register that must be encoded using REX
30816 x86_extended_reg_mentioned_p (rtx insn)
30818 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
30819 extended_reg_mentioned_1, NULL);
30822 /* If profitable, negate (without causing overflow) integer constant
30823 of mode MODE at location LOC. Return true in this case. */
30825 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
30829 if (!CONST_INT_P (*loc))
30835 /* DImode x86_64 constants must fit in 32 bits. */
30836 gcc_assert (x86_64_immediate_operand (*loc, mode));
30847 gcc_unreachable ();
30850 /* Avoid overflows. */
30851 if (mode_signbit_p (mode, *loc))
30854 val = INTVAL (*loc);
30856 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30857 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30858 if ((val < 0 && val != -128)
30861 *loc = GEN_INT (-val);
30868 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30869 optabs would emit if we didn't have TFmode patterns. */
30872 x86_emit_floatuns (rtx operands[2])
30874 rtx neglab, donelab, i0, i1, f0, in, out;
30875 enum machine_mode mode, inmode;
30877 inmode = GET_MODE (operands[1]);
30878 gcc_assert (inmode == SImode || inmode == DImode);
30881 in = force_reg (inmode, operands[1]);
30882 mode = GET_MODE (out);
30883 neglab = gen_label_rtx ();
30884 donelab = gen_label_rtx ();
30885 f0 = gen_reg_rtx (mode);
30887 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30889 expand_float (out, in, 0);
30891 emit_jump_insn (gen_jump (donelab));
30894 emit_label (neglab);
30896 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30898 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30900 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30902 expand_float (f0, i0, 0);
30904 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30906 emit_label (donelab);
30909 /* AVX does not support 32-byte integer vector operations,
30910 thus the longest vector we are faced with is V16QImode. */
30911 #define MAX_VECT_LEN 16
30913 struct expand_vec_perm_d
30915 rtx target, op0, op1;
30916 unsigned char perm[MAX_VECT_LEN];
30917 enum machine_mode vmode;
30918 unsigned char nelt;
30922 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30923 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30925 /* Get a vector mode of the same size as the original but with elements
30926 twice as wide. This is only guaranteed to apply to integral vectors. */
30928 static inline enum machine_mode
30929 get_mode_wider_vector (enum machine_mode o)
30931 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30932 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30933 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30934 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30938 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30939 with all elements equal to VAR. Return true if successful. */
30942 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30943 rtx target, rtx val)
30966 /* First attempt to recognize VAL as-is. */
30967 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30968 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30969 if (recog_memoized (insn) < 0)
30972 /* If that fails, force VAL into a register. */
30975 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30976 seq = get_insns ();
30979 emit_insn_before (seq, insn);
30981 ok = recog_memoized (insn) >= 0;
30990 if (TARGET_SSE || TARGET_3DNOW_A)
30994 val = gen_lowpart (SImode, val);
30995 x = gen_rtx_TRUNCATE (HImode, val);
30996 x = gen_rtx_VEC_DUPLICATE (mode, x);
30997 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31010 struct expand_vec_perm_d dperm;
31014 memset (&dperm, 0, sizeof (dperm));
31015 dperm.target = target;
31016 dperm.vmode = mode;
31017 dperm.nelt = GET_MODE_NUNITS (mode);
31018 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
31020 /* Extend to SImode using a paradoxical SUBREG. */
31021 tmp1 = gen_reg_rtx (SImode);
31022 emit_move_insn (tmp1, gen_lowpart (SImode, val));
31024 /* Insert the SImode value as low element of a V4SImode vector. */
31025 tmp2 = gen_lowpart (V4SImode, dperm.op0);
31026 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
31028 ok = (expand_vec_perm_1 (&dperm)
31029 || expand_vec_perm_broadcast_1 (&dperm));
31041 /* Replicate the value once into the next wider mode and recurse. */
31043 enum machine_mode smode, wsmode, wvmode;
31046 smode = GET_MODE_INNER (mode);
31047 wvmode = get_mode_wider_vector (mode);
31048 wsmode = GET_MODE_INNER (wvmode);
31050 val = convert_modes (wsmode, smode, val, true);
31051 x = expand_simple_binop (wsmode, ASHIFT, val,
31052 GEN_INT (GET_MODE_BITSIZE (smode)),
31053 NULL_RTX, 1, OPTAB_LIB_WIDEN);
31054 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
31056 x = gen_lowpart (wvmode, target);
31057 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
31065 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
31066 rtx x = gen_reg_rtx (hvmode);
31068 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
31071 x = gen_rtx_VEC_CONCAT (mode, x, x);
31072 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31081 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
31082 whose ONE_VAR element is VAR, and other elements are zero. Return true
31086 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
31087 rtx target, rtx var, int one_var)
31089 enum machine_mode vsimode;
31092 bool use_vector_set = false;
31097 /* For SSE4.1, we normally use vector set. But if the second
31098 element is zero and inter-unit moves are OK, we use movq
31100 use_vector_set = (TARGET_64BIT
31102 && !(TARGET_INTER_UNIT_MOVES
31108 use_vector_set = TARGET_SSE4_1;
31111 use_vector_set = TARGET_SSE2;
31114 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
31121 use_vector_set = TARGET_AVX;
31124 /* Use ix86_expand_vector_set in 64bit mode only. */
31125 use_vector_set = TARGET_AVX && TARGET_64BIT;
31131 if (use_vector_set)
31133 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
31134 var = force_reg (GET_MODE_INNER (mode), var);
31135 ix86_expand_vector_set (mmx_ok, target, var, one_var);
31151 var = force_reg (GET_MODE_INNER (mode), var);
31152 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
31153 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31158 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
31159 new_target = gen_reg_rtx (mode);
31161 new_target = target;
31162 var = force_reg (GET_MODE_INNER (mode), var);
31163 x = gen_rtx_VEC_DUPLICATE (mode, var);
31164 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
31165 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
31168 /* We need to shuffle the value to the correct position, so
31169 create a new pseudo to store the intermediate result. */
31171 /* With SSE2, we can use the integer shuffle insns. */
31172 if (mode != V4SFmode && TARGET_SSE2)
31174 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
31176 GEN_INT (one_var == 1 ? 0 : 1),
31177 GEN_INT (one_var == 2 ? 0 : 1),
31178 GEN_INT (one_var == 3 ? 0 : 1)));
31179 if (target != new_target)
31180 emit_move_insn (target, new_target);
31184 /* Otherwise convert the intermediate result to V4SFmode and
31185 use the SSE1 shuffle instructions. */
31186 if (mode != V4SFmode)
31188 tmp = gen_reg_rtx (V4SFmode);
31189 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
31194 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
31196 GEN_INT (one_var == 1 ? 0 : 1),
31197 GEN_INT (one_var == 2 ? 0+4 : 1+4),
31198 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
31200 if (mode != V4SFmode)
31201 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
31202 else if (tmp != target)
31203 emit_move_insn (target, tmp);
31205 else if (target != new_target)
31206 emit_move_insn (target, new_target);
31211 vsimode = V4SImode;
31217 vsimode = V2SImode;
31223 /* Zero extend the variable element to SImode and recurse. */
31224 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
31226 x = gen_reg_rtx (vsimode);
31227 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
31229 gcc_unreachable ();
31231 emit_move_insn (target, gen_lowpart (mode, x));
31239 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
31240 consisting of the values in VALS. It is known that all elements
31241 except ONE_VAR are constants. Return true if successful. */
31244 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
31245 rtx target, rtx vals, int one_var)
31247 rtx var = XVECEXP (vals, 0, one_var);
31248 enum machine_mode wmode;
31251 const_vec = copy_rtx (vals);
31252 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
31253 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
31261 /* For the two element vectors, it's just as easy to use
31262 the general case. */
31266 /* Use ix86_expand_vector_set in 64bit mode only. */
31289 /* There's no way to set one QImode entry easily. Combine
31290 the variable value with its adjacent constant value, and
31291 promote to an HImode set. */
31292 x = XVECEXP (vals, 0, one_var ^ 1);
31295 var = convert_modes (HImode, QImode, var, true);
31296 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
31297 NULL_RTX, 1, OPTAB_LIB_WIDEN);
31298 x = GEN_INT (INTVAL (x) & 0xff);
31302 var = convert_modes (HImode, QImode, var, true);
31303 x = gen_int_mode (INTVAL (x) << 8, HImode);
31305 if (x != const0_rtx)
31306 var = expand_simple_binop (HImode, IOR, var, x, var,
31307 1, OPTAB_LIB_WIDEN);
31309 x = gen_reg_rtx (wmode);
31310 emit_move_insn (x, gen_lowpart (wmode, const_vec));
31311 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
31313 emit_move_insn (target, gen_lowpart (mode, x));
31320 emit_move_insn (target, const_vec);
31321 ix86_expand_vector_set (mmx_ok, target, var, one_var);
31325 /* A subroutine of ix86_expand_vector_init_general. Use vector
31326 concatenate to handle the most general case: all values variable,
31327 and none identical. */
31330 ix86_expand_vector_init_concat (enum machine_mode mode,
31331 rtx target, rtx *ops, int n)
31333 enum machine_mode cmode, hmode = VOIDmode;
31334 rtx first[8], second[4];
31374 gcc_unreachable ();
31377 if (!register_operand (ops[1], cmode))
31378 ops[1] = force_reg (cmode, ops[1]);
31379 if (!register_operand (ops[0], cmode))
31380 ops[0] = force_reg (cmode, ops[0]);
31381 emit_insn (gen_rtx_SET (VOIDmode, target,
31382 gen_rtx_VEC_CONCAT (mode, ops[0],
31402 gcc_unreachable ();
31418 gcc_unreachable ();
31423 /* FIXME: We process inputs backward to help RA. PR 36222. */
31426 for (; i > 0; i -= 2, j--)
31428 first[j] = gen_reg_rtx (cmode);
31429 v = gen_rtvec (2, ops[i - 1], ops[i]);
31430 ix86_expand_vector_init (false, first[j],
31431 gen_rtx_PARALLEL (cmode, v));
31437 gcc_assert (hmode != VOIDmode);
31438 for (i = j = 0; i < n; i += 2, j++)
31440 second[j] = gen_reg_rtx (hmode);
31441 ix86_expand_vector_init_concat (hmode, second [j],
31445 ix86_expand_vector_init_concat (mode, target, second, n);
31448 ix86_expand_vector_init_concat (mode, target, first, n);
31452 gcc_unreachable ();
31456 /* A subroutine of ix86_expand_vector_init_general. Use vector
31457 interleave to handle the most general case: all values variable,
31458 and none identical. */
31461 ix86_expand_vector_init_interleave (enum machine_mode mode,
31462 rtx target, rtx *ops, int n)
31464 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
31467 rtx (*gen_load_even) (rtx, rtx, rtx);
31468 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
31469 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
31474 gen_load_even = gen_vec_setv8hi;
31475 gen_interleave_first_low = gen_vec_interleave_lowv4si;
31476 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31477 inner_mode = HImode;
31478 first_imode = V4SImode;
31479 second_imode = V2DImode;
31480 third_imode = VOIDmode;
31483 gen_load_even = gen_vec_setv16qi;
31484 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
31485 gen_interleave_second_low = gen_vec_interleave_lowv4si;
31486 inner_mode = QImode;
31487 first_imode = V8HImode;
31488 second_imode = V4SImode;
31489 third_imode = V2DImode;
31492 gcc_unreachable ();
31495 for (i = 0; i < n; i++)
31497 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
31498 op0 = gen_reg_rtx (SImode);
31499 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
31501 /* Insert the SImode value as low element of V4SImode vector. */
31502 op1 = gen_reg_rtx (V4SImode);
31503 op0 = gen_rtx_VEC_MERGE (V4SImode,
31504 gen_rtx_VEC_DUPLICATE (V4SImode,
31506 CONST0_RTX (V4SImode),
31508 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
31510 /* Cast the V4SImode vector back to a vector in orignal mode. */
31511 op0 = gen_reg_rtx (mode);
31512 emit_move_insn (op0, gen_lowpart (mode, op1));
31514 /* Load even elements into the second positon. */
31515 emit_insn (gen_load_even (op0,
31516 force_reg (inner_mode,
31520 /* Cast vector to FIRST_IMODE vector. */
31521 ops[i] = gen_reg_rtx (first_imode);
31522 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
31525 /* Interleave low FIRST_IMODE vectors. */
31526 for (i = j = 0; i < n; i += 2, j++)
31528 op0 = gen_reg_rtx (first_imode);
31529 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
31531 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
31532 ops[j] = gen_reg_rtx (second_imode);
31533 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
31536 /* Interleave low SECOND_IMODE vectors. */
31537 switch (second_imode)
31540 for (i = j = 0; i < n / 2; i += 2, j++)
31542 op0 = gen_reg_rtx (second_imode);
31543 emit_insn (gen_interleave_second_low (op0, ops[i],
31546 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
31548 ops[j] = gen_reg_rtx (third_imode);
31549 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
31551 second_imode = V2DImode;
31552 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31556 op0 = gen_reg_rtx (second_imode);
31557 emit_insn (gen_interleave_second_low (op0, ops[0],
31560 /* Cast the SECOND_IMODE vector back to a vector on original
31562 emit_insn (gen_rtx_SET (VOIDmode, target,
31563 gen_lowpart (mode, op0)));
31567 gcc_unreachable ();
31571 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
31572 all values variable, and none identical. */
31575 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
31576 rtx target, rtx vals)
31578 rtx ops[32], op0, op1;
31579 enum machine_mode half_mode = VOIDmode;
31586 if (!mmx_ok && !TARGET_SSE)
31598 n = GET_MODE_NUNITS (mode);
31599 for (i = 0; i < n; i++)
31600 ops[i] = XVECEXP (vals, 0, i);
31601 ix86_expand_vector_init_concat (mode, target, ops, n);
31605 half_mode = V16QImode;
31609 half_mode = V8HImode;
31613 n = GET_MODE_NUNITS (mode);
31614 for (i = 0; i < n; i++)
31615 ops[i] = XVECEXP (vals, 0, i);
31616 op0 = gen_reg_rtx (half_mode);
31617 op1 = gen_reg_rtx (half_mode);
31618 ix86_expand_vector_init_interleave (half_mode, op0, ops,
31620 ix86_expand_vector_init_interleave (half_mode, op1,
31621 &ops [n >> 1], n >> 2);
31622 emit_insn (gen_rtx_SET (VOIDmode, target,
31623 gen_rtx_VEC_CONCAT (mode, op0, op1)));
31627 if (!TARGET_SSE4_1)
31635 /* Don't use ix86_expand_vector_init_interleave if we can't
31636 move from GPR to SSE register directly. */
31637 if (!TARGET_INTER_UNIT_MOVES)
31640 n = GET_MODE_NUNITS (mode);
31641 for (i = 0; i < n; i++)
31642 ops[i] = XVECEXP (vals, 0, i);
31643 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
31651 gcc_unreachable ();
31655 int i, j, n_elts, n_words, n_elt_per_word;
31656 enum machine_mode inner_mode;
31657 rtx words[4], shift;
31659 inner_mode = GET_MODE_INNER (mode);
31660 n_elts = GET_MODE_NUNITS (mode);
31661 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
31662 n_elt_per_word = n_elts / n_words;
31663 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
31665 for (i = 0; i < n_words; ++i)
31667 rtx word = NULL_RTX;
31669 for (j = 0; j < n_elt_per_word; ++j)
31671 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
31672 elt = convert_modes (word_mode, inner_mode, elt, true);
31678 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
31679 word, 1, OPTAB_LIB_WIDEN);
31680 word = expand_simple_binop (word_mode, IOR, word, elt,
31681 word, 1, OPTAB_LIB_WIDEN);
31689 emit_move_insn (target, gen_lowpart (mode, words[0]));
31690 else if (n_words == 2)
31692 rtx tmp = gen_reg_rtx (mode);
31693 emit_clobber (tmp);
31694 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
31695 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
31696 emit_move_insn (target, tmp);
31698 else if (n_words == 4)
31700 rtx tmp = gen_reg_rtx (V4SImode);
31701 gcc_assert (word_mode == SImode);
31702 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
31703 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
31704 emit_move_insn (target, gen_lowpart (mode, tmp));
31707 gcc_unreachable ();
31711 /* Initialize vector TARGET via VALS. Suppress the use of MMX
31712 instructions unless MMX_OK is true. */
31715 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
31717 enum machine_mode mode = GET_MODE (target);
31718 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31719 int n_elts = GET_MODE_NUNITS (mode);
31720 int n_var = 0, one_var = -1;
31721 bool all_same = true, all_const_zero = true;
31725 for (i = 0; i < n_elts; ++i)
31727 x = XVECEXP (vals, 0, i);
31728 if (!(CONST_INT_P (x)
31729 || GET_CODE (x) == CONST_DOUBLE
31730 || GET_CODE (x) == CONST_FIXED))
31731 n_var++, one_var = i;
31732 else if (x != CONST0_RTX (inner_mode))
31733 all_const_zero = false;
31734 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
31738 /* Constants are best loaded from the constant pool. */
31741 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
31745 /* If all values are identical, broadcast the value. */
31747 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
31748 XVECEXP (vals, 0, 0)))
31751 /* Values where only one field is non-constant are best loaded from
31752 the pool and overwritten via move later. */
31756 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
31757 XVECEXP (vals, 0, one_var),
31761 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
31765 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
31769 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
31771 enum machine_mode mode = GET_MODE (target);
31772 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31773 enum machine_mode half_mode;
31774 bool use_vec_merge = false;
31776 static rtx (*gen_extract[6][2]) (rtx, rtx)
31778 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
31779 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
31780 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
31781 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
31782 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
31783 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
31785 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
31787 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
31788 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
31789 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
31790 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
31791 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
31792 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
31802 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31803 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
31805 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31807 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31808 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31814 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
31818 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31819 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
31821 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31823 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31824 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31831 /* For the two element vectors, we implement a VEC_CONCAT with
31832 the extraction of the other element. */
31834 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
31835 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
31838 op0 = val, op1 = tmp;
31840 op0 = tmp, op1 = val;
31842 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
31843 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31848 use_vec_merge = TARGET_SSE4_1;
31855 use_vec_merge = true;
31859 /* tmp = target = A B C D */
31860 tmp = copy_to_reg (target);
31861 /* target = A A B B */
31862 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31863 /* target = X A B B */
31864 ix86_expand_vector_set (false, target, val, 0);
31865 /* target = A X C D */
31866 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31867 const1_rtx, const0_rtx,
31868 GEN_INT (2+4), GEN_INT (3+4)));
31872 /* tmp = target = A B C D */
31873 tmp = copy_to_reg (target);
31874 /* tmp = X B C D */
31875 ix86_expand_vector_set (false, tmp, val, 0);
31876 /* target = A B X D */
31877 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31878 const0_rtx, const1_rtx,
31879 GEN_INT (0+4), GEN_INT (3+4)));
31883 /* tmp = target = A B C D */
31884 tmp = copy_to_reg (target);
31885 /* tmp = X B C D */
31886 ix86_expand_vector_set (false, tmp, val, 0);
31887 /* target = A B X D */
31888 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31889 const0_rtx, const1_rtx,
31890 GEN_INT (2+4), GEN_INT (0+4)));
31894 gcc_unreachable ();
31899 use_vec_merge = TARGET_SSE4_1;
31903 /* Element 0 handled by vec_merge below. */
31906 use_vec_merge = true;
31912 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31913 store into element 0, then shuffle them back. */
31917 order[0] = GEN_INT (elt);
31918 order[1] = const1_rtx;
31919 order[2] = const2_rtx;
31920 order[3] = GEN_INT (3);
31921 order[elt] = const0_rtx;
31923 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31924 order[1], order[2], order[3]));
31926 ix86_expand_vector_set (false, target, val, 0);
31928 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31929 order[1], order[2], order[3]));
31933 /* For SSE1, we have to reuse the V4SF code. */
31934 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31935 gen_lowpart (SFmode, val), elt);
31940 use_vec_merge = TARGET_SSE2;
31943 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31947 use_vec_merge = TARGET_SSE4_1;
31954 half_mode = V16QImode;
31960 half_mode = V8HImode;
31966 half_mode = V4SImode;
31972 half_mode = V2DImode;
31978 half_mode = V4SFmode;
31984 half_mode = V2DFmode;
31990 /* Compute offset. */
31994 gcc_assert (i <= 1);
31996 /* Extract the half. */
31997 tmp = gen_reg_rtx (half_mode);
31998 emit_insn (gen_extract[j][i] (tmp, target));
32000 /* Put val in tmp at elt. */
32001 ix86_expand_vector_set (false, tmp, val, elt);
32004 emit_insn (gen_insert[j][i] (target, target, tmp));
32013 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
32014 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
32015 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32019 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
32021 emit_move_insn (mem, target);
32023 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
32024 emit_move_insn (tmp, val);
32026 emit_move_insn (target, mem);
32031 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
32033 enum machine_mode mode = GET_MODE (vec);
32034 enum machine_mode inner_mode = GET_MODE_INNER (mode);
32035 bool use_vec_extr = false;
32048 use_vec_extr = true;
32052 use_vec_extr = TARGET_SSE4_1;
32064 tmp = gen_reg_rtx (mode);
32065 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
32066 GEN_INT (elt), GEN_INT (elt),
32067 GEN_INT (elt+4), GEN_INT (elt+4)));
32071 tmp = gen_reg_rtx (mode);
32072 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
32076 gcc_unreachable ();
32079 use_vec_extr = true;
32084 use_vec_extr = TARGET_SSE4_1;
32098 tmp = gen_reg_rtx (mode);
32099 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
32100 GEN_INT (elt), GEN_INT (elt),
32101 GEN_INT (elt), GEN_INT (elt)));
32105 tmp = gen_reg_rtx (mode);
32106 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
32110 gcc_unreachable ();
32113 use_vec_extr = true;
32118 /* For SSE1, we have to reuse the V4SF code. */
32119 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
32120 gen_lowpart (V4SFmode, vec), elt);
32126 use_vec_extr = TARGET_SSE2;
32129 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
32133 use_vec_extr = TARGET_SSE4_1;
32137 /* ??? Could extract the appropriate HImode element and shift. */
32144 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
32145 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
32147 /* Let the rtl optimizers know about the zero extension performed. */
32148 if (inner_mode == QImode || inner_mode == HImode)
32150 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
32151 target = gen_lowpart (SImode, target);
32154 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32158 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
32160 emit_move_insn (mem, vec);
32162 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
32163 emit_move_insn (target, tmp);
32167 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
32168 pattern to reduce; DEST is the destination; IN is the input vector. */
32171 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
32173 rtx tmp1, tmp2, tmp3;
32175 tmp1 = gen_reg_rtx (V4SFmode);
32176 tmp2 = gen_reg_rtx (V4SFmode);
32177 tmp3 = gen_reg_rtx (V4SFmode);
32179 emit_insn (gen_sse_movhlps (tmp1, in, in));
32180 emit_insn (fn (tmp2, tmp1, in));
32182 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
32183 const1_rtx, const1_rtx,
32184 GEN_INT (1+4), GEN_INT (1+4)));
32185 emit_insn (fn (dest, tmp2, tmp3));
32188 /* Target hook for scalar_mode_supported_p. */
32190 ix86_scalar_mode_supported_p (enum machine_mode mode)
32192 if (DECIMAL_FLOAT_MODE_P (mode))
32193 return default_decimal_float_supported_p ();
32194 else if (mode == TFmode)
32197 return default_scalar_mode_supported_p (mode);
32200 /* Implements target hook vector_mode_supported_p. */
32202 ix86_vector_mode_supported_p (enum machine_mode mode)
32204 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
32206 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
32208 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
32210 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
32212 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
32217 /* Target hook for c_mode_for_suffix. */
32218 static enum machine_mode
32219 ix86_c_mode_for_suffix (char suffix)
32229 /* Worker function for TARGET_MD_ASM_CLOBBERS.
32231 We do this in the new i386 backend to maintain source compatibility
32232 with the old cc0-based compiler. */
32235 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
32236 tree inputs ATTRIBUTE_UNUSED,
32239 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
32241 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
32246 /* Implements target vector targetm.asm.encode_section_info. */
32248 static void ATTRIBUTE_UNUSED
32249 ix86_encode_section_info (tree decl, rtx rtl, int first)
32251 default_encode_section_info (decl, rtl, first);
32253 if (TREE_CODE (decl) == VAR_DECL
32254 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
32255 && ix86_in_large_data_p (decl))
32256 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
32259 /* Worker function for REVERSE_CONDITION. */
32262 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
32264 return (mode != CCFPmode && mode != CCFPUmode
32265 ? reverse_condition (code)
32266 : reverse_condition_maybe_unordered (code));
32269 /* Output code to perform an x87 FP register move, from OPERANDS[1]
32273 output_387_reg_move (rtx insn, rtx *operands)
32275 if (REG_P (operands[0]))
32277 if (REG_P (operands[1])
32278 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
32280 if (REGNO (operands[0]) == FIRST_STACK_REG)
32281 return output_387_ffreep (operands, 0);
32282 return "fstp\t%y0";
32284 if (STACK_TOP_P (operands[0]))
32285 return "fld%Z1\t%y1";
32288 else if (MEM_P (operands[0]))
32290 gcc_assert (REG_P (operands[1]));
32291 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
32292 return "fstp%Z0\t%y0";
32295 /* There is no non-popping store to memory for XFmode.
32296 So if we need one, follow the store with a load. */
32297 if (GET_MODE (operands[0]) == XFmode)
32298 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
32300 return "fst%Z0\t%y0";
32307 /* Output code to perform a conditional jump to LABEL, if C2 flag in
32308 FP status register is set. */
32311 ix86_emit_fp_unordered_jump (rtx label)
32313 rtx reg = gen_reg_rtx (HImode);
32316 emit_insn (gen_x86_fnstsw_1 (reg));
32318 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
32320 emit_insn (gen_x86_sahf_1 (reg));
32322 temp = gen_rtx_REG (CCmode, FLAGS_REG);
32323 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
32327 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
32329 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
32330 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
32333 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
32334 gen_rtx_LABEL_REF (VOIDmode, label),
32336 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
32338 emit_jump_insn (temp);
32339 predict_jump (REG_BR_PROB_BASE * 10 / 100);
32342 /* Output code to perform a log1p XFmode calculation. */
32344 void ix86_emit_i387_log1p (rtx op0, rtx op1)
32346 rtx label1 = gen_label_rtx ();
32347 rtx label2 = gen_label_rtx ();
32349 rtx tmp = gen_reg_rtx (XFmode);
32350 rtx tmp2 = gen_reg_rtx (XFmode);
32353 emit_insn (gen_absxf2 (tmp, op1));
32354 test = gen_rtx_GE (VOIDmode, tmp,
32355 CONST_DOUBLE_FROM_REAL_VALUE (
32356 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
32358 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
32360 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
32361 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
32362 emit_jump (label2);
32364 emit_label (label1);
32365 emit_move_insn (tmp, CONST1_RTX (XFmode));
32366 emit_insn (gen_addxf3 (tmp, op1, tmp));
32367 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
32368 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
32370 emit_label (label2);
32373 /* Emit code for round calculation. */
32374 void ix86_emit_i387_round (rtx op0, rtx op1)
32376 enum machine_mode inmode = GET_MODE (op1);
32377 enum machine_mode outmode = GET_MODE (op0);
32378 rtx e1, e2, res, tmp, tmp1, half;
32379 rtx scratch = gen_reg_rtx (HImode);
32380 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
32381 rtx jump_label = gen_label_rtx ();
32383 rtx (*gen_abs) (rtx, rtx);
32384 rtx (*gen_neg) (rtx, rtx);
32389 gen_abs = gen_abssf2;
32392 gen_abs = gen_absdf2;
32395 gen_abs = gen_absxf2;
32398 gcc_unreachable ();
32404 gen_neg = gen_negsf2;
32407 gen_neg = gen_negdf2;
32410 gen_neg = gen_negxf2;
32413 gen_neg = gen_neghi2;
32416 gen_neg = gen_negsi2;
32419 gen_neg = gen_negdi2;
32422 gcc_unreachable ();
32425 e1 = gen_reg_rtx (inmode);
32426 e2 = gen_reg_rtx (inmode);
32427 res = gen_reg_rtx (outmode);
32429 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
32431 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
32433 /* scratch = fxam(op1) */
32434 emit_insn (gen_rtx_SET (VOIDmode, scratch,
32435 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
32437 /* e1 = fabs(op1) */
32438 emit_insn (gen_abs (e1, op1));
32440 /* e2 = e1 + 0.5 */
32441 half = force_reg (inmode, half);
32442 emit_insn (gen_rtx_SET (VOIDmode, e2,
32443 gen_rtx_PLUS (inmode, e1, half)));
32445 /* res = floor(e2) */
32446 if (inmode != XFmode)
32448 tmp1 = gen_reg_rtx (XFmode);
32450 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
32451 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
32461 rtx tmp0 = gen_reg_rtx (XFmode);
32463 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
32465 emit_insn (gen_rtx_SET (VOIDmode, res,
32466 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
32467 UNSPEC_TRUNC_NOOP)));
32471 emit_insn (gen_frndintxf2_floor (res, tmp1));
32474 emit_insn (gen_lfloorxfhi2 (res, tmp1));
32477 emit_insn (gen_lfloorxfsi2 (res, tmp1));
32480 emit_insn (gen_lfloorxfdi2 (res, tmp1));
32483 gcc_unreachable ();
32486 /* flags = signbit(a) */
32487 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
32489 /* if (flags) then res = -res */
32490 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
32491 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
32492 gen_rtx_LABEL_REF (VOIDmode, jump_label),
32494 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32495 predict_jump (REG_BR_PROB_BASE * 50 / 100);
32496 JUMP_LABEL (insn) = jump_label;
32498 emit_insn (gen_neg (res, res));
32500 emit_label (jump_label);
32501 LABEL_NUSES (jump_label) = 1;
32503 emit_move_insn (op0, res);
32506 /* Output code to perform a Newton-Rhapson approximation of a single precision
32507 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
32509 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
32511 rtx x0, x1, e0, e1;
32513 x0 = gen_reg_rtx (mode);
32514 e0 = gen_reg_rtx (mode);
32515 e1 = gen_reg_rtx (mode);
32516 x1 = gen_reg_rtx (mode);
32518 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
32520 /* x0 = rcp(b) estimate */
32521 emit_insn (gen_rtx_SET (VOIDmode, x0,
32522 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
32525 emit_insn (gen_rtx_SET (VOIDmode, e0,
32526 gen_rtx_MULT (mode, x0, b)));
32529 emit_insn (gen_rtx_SET (VOIDmode, e0,
32530 gen_rtx_MULT (mode, x0, e0)));
32533 emit_insn (gen_rtx_SET (VOIDmode, e1,
32534 gen_rtx_PLUS (mode, x0, x0)));
32537 emit_insn (gen_rtx_SET (VOIDmode, x1,
32538 gen_rtx_MINUS (mode, e1, e0)));
32541 emit_insn (gen_rtx_SET (VOIDmode, res,
32542 gen_rtx_MULT (mode, a, x1)));
32545 /* Output code to perform a Newton-Rhapson approximation of a
32546 single precision floating point [reciprocal] square root. */
32548 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
32551 rtx x0, e0, e1, e2, e3, mthree, mhalf;
32554 x0 = gen_reg_rtx (mode);
32555 e0 = gen_reg_rtx (mode);
32556 e1 = gen_reg_rtx (mode);
32557 e2 = gen_reg_rtx (mode);
32558 e3 = gen_reg_rtx (mode);
32560 real_from_integer (&r, VOIDmode, -3, -1, 0);
32561 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
32563 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
32564 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
32566 if (VECTOR_MODE_P (mode))
32568 mthree = ix86_build_const_vector (mode, true, mthree);
32569 mhalf = ix86_build_const_vector (mode, true, mhalf);
32572 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
32573 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
32575 /* x0 = rsqrt(a) estimate */
32576 emit_insn (gen_rtx_SET (VOIDmode, x0,
32577 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
32580 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
32585 zero = gen_reg_rtx (mode);
32586 mask = gen_reg_rtx (mode);
32588 zero = force_reg (mode, CONST0_RTX(mode));
32589 emit_insn (gen_rtx_SET (VOIDmode, mask,
32590 gen_rtx_NE (mode, zero, a)));
32592 emit_insn (gen_rtx_SET (VOIDmode, x0,
32593 gen_rtx_AND (mode, x0, mask)));
32597 emit_insn (gen_rtx_SET (VOIDmode, e0,
32598 gen_rtx_MULT (mode, x0, a)));
32600 emit_insn (gen_rtx_SET (VOIDmode, e1,
32601 gen_rtx_MULT (mode, e0, x0)));
32604 mthree = force_reg (mode, mthree);
32605 emit_insn (gen_rtx_SET (VOIDmode, e2,
32606 gen_rtx_PLUS (mode, e1, mthree)));
32608 mhalf = force_reg (mode, mhalf);
32610 /* e3 = -.5 * x0 */
32611 emit_insn (gen_rtx_SET (VOIDmode, e3,
32612 gen_rtx_MULT (mode, x0, mhalf)));
32614 /* e3 = -.5 * e0 */
32615 emit_insn (gen_rtx_SET (VOIDmode, e3,
32616 gen_rtx_MULT (mode, e0, mhalf)));
32617 /* ret = e2 * e3 */
32618 emit_insn (gen_rtx_SET (VOIDmode, res,
32619 gen_rtx_MULT (mode, e2, e3)));
32622 #ifdef TARGET_SOLARIS
32623 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
32626 i386_solaris_elf_named_section (const char *name, unsigned int flags,
32629 /* With Binutils 2.15, the "@unwind" marker must be specified on
32630 every occurrence of the ".eh_frame" section, not just the first
32633 && strcmp (name, ".eh_frame") == 0)
32635 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
32636 flags & SECTION_WRITE ? "aw" : "a");
32641 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
32643 solaris_elf_asm_comdat_section (name, flags, decl);
32648 default_elf_asm_named_section (name, flags, decl);
32650 #endif /* TARGET_SOLARIS */
32652 /* Return the mangling of TYPE if it is an extended fundamental type. */
32654 static const char *
32655 ix86_mangle_type (const_tree type)
32657 type = TYPE_MAIN_VARIANT (type);
32659 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32660 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32663 switch (TYPE_MODE (type))
32666 /* __float128 is "g". */
32669 /* "long double" or __float80 is "e". */
32676 /* For 32-bit code we can save PIC register setup by using
32677 __stack_chk_fail_local hidden function instead of calling
32678 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
32679 register, so it is better to call __stack_chk_fail directly. */
32681 static tree ATTRIBUTE_UNUSED
32682 ix86_stack_protect_fail (void)
32684 return TARGET_64BIT
32685 ? default_external_stack_protect_fail ()
32686 : default_hidden_stack_protect_fail ();
32689 /* Select a format to encode pointers in exception handling data. CODE
32690 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
32691 true if the symbol may be affected by dynamic relocations.
32693 ??? All x86 object file formats are capable of representing this.
32694 After all, the relocation needed is the same as for the call insn.
32695 Whether or not a particular assembler allows us to enter such, I
32696 guess we'll have to see. */
32698 asm_preferred_eh_data_format (int code, int global)
32702 int type = DW_EH_PE_sdata8;
32704 || ix86_cmodel == CM_SMALL_PIC
32705 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
32706 type = DW_EH_PE_sdata4;
32707 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
32709 if (ix86_cmodel == CM_SMALL
32710 || (ix86_cmodel == CM_MEDIUM && code))
32711 return DW_EH_PE_udata4;
32712 return DW_EH_PE_absptr;
32715 /* Expand copysign from SIGN to the positive value ABS_VALUE
32716 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
32719 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
32721 enum machine_mode mode = GET_MODE (sign);
32722 rtx sgn = gen_reg_rtx (mode);
32723 if (mask == NULL_RTX)
32725 enum machine_mode vmode;
32727 if (mode == SFmode)
32729 else if (mode == DFmode)
32734 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
32735 if (!VECTOR_MODE_P (mode))
32737 /* We need to generate a scalar mode mask in this case. */
32738 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32739 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32740 mask = gen_reg_rtx (mode);
32741 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32745 mask = gen_rtx_NOT (mode, mask);
32746 emit_insn (gen_rtx_SET (VOIDmode, sgn,
32747 gen_rtx_AND (mode, mask, sign)));
32748 emit_insn (gen_rtx_SET (VOIDmode, result,
32749 gen_rtx_IOR (mode, abs_value, sgn)));
32752 /* Expand fabs (OP0) and return a new rtx that holds the result. The
32753 mask for masking out the sign-bit is stored in *SMASK, if that is
32756 ix86_expand_sse_fabs (rtx op0, rtx *smask)
32758 enum machine_mode vmode, mode = GET_MODE (op0);
32761 xa = gen_reg_rtx (mode);
32762 if (mode == SFmode)
32764 else if (mode == DFmode)
32768 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
32769 if (!VECTOR_MODE_P (mode))
32771 /* We need to generate a scalar mode mask in this case. */
32772 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32773 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32774 mask = gen_reg_rtx (mode);
32775 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32777 emit_insn (gen_rtx_SET (VOIDmode, xa,
32778 gen_rtx_AND (mode, op0, mask)));
32786 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
32787 swapping the operands if SWAP_OPERANDS is true. The expanded
32788 code is a forward jump to a newly created label in case the
32789 comparison is true. The generated label rtx is returned. */
32791 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
32792 bool swap_operands)
32803 label = gen_label_rtx ();
32804 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
32805 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32806 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
32807 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
32808 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
32809 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
32810 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32811 JUMP_LABEL (tmp) = label;
32816 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
32817 using comparison code CODE. Operands are swapped for the comparison if
32818 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
32820 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
32821 bool swap_operands)
32823 rtx (*insn)(rtx, rtx, rtx, rtx);
32824 enum machine_mode mode = GET_MODE (op0);
32825 rtx mask = gen_reg_rtx (mode);
32834 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
32836 emit_insn (insn (mask, op0, op1,
32837 gen_rtx_fmt_ee (code, mode, op0, op1)));
32841 /* Generate and return a rtx of mode MODE for 2**n where n is the number
32842 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
32844 ix86_gen_TWO52 (enum machine_mode mode)
32846 REAL_VALUE_TYPE TWO52r;
32849 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
32850 TWO52 = const_double_from_real_value (TWO52r, mode);
32851 TWO52 = force_reg (mode, TWO52);
32856 /* Expand SSE sequence for computing lround from OP1 storing
32859 ix86_expand_lround (rtx op0, rtx op1)
32861 /* C code for the stuff we're doing below:
32862 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
32865 enum machine_mode mode = GET_MODE (op1);
32866 const struct real_format *fmt;
32867 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32870 /* load nextafter (0.5, 0.0) */
32871 fmt = REAL_MODE_FORMAT (mode);
32872 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32873 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32875 /* adj = copysign (0.5, op1) */
32876 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
32877 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
32879 /* adj = op1 + adj */
32880 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
32882 /* op0 = (imode)adj */
32883 expand_fix (op0, adj, 0);
32886 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
32889 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
32891 /* C code for the stuff we're doing below (for do_floor):
32893 xi -= (double)xi > op1 ? 1 : 0;
32896 enum machine_mode fmode = GET_MODE (op1);
32897 enum machine_mode imode = GET_MODE (op0);
32898 rtx ireg, freg, label, tmp;
32900 /* reg = (long)op1 */
32901 ireg = gen_reg_rtx (imode);
32902 expand_fix (ireg, op1, 0);
32904 /* freg = (double)reg */
32905 freg = gen_reg_rtx (fmode);
32906 expand_float (freg, ireg, 0);
32908 /* ireg = (freg > op1) ? ireg - 1 : ireg */
32909 label = ix86_expand_sse_compare_and_jump (UNLE,
32910 freg, op1, !do_floor);
32911 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
32912 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
32913 emit_move_insn (ireg, tmp);
32915 emit_label (label);
32916 LABEL_NUSES (label) = 1;
32918 emit_move_insn (op0, ireg);
32921 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
32922 result in OPERAND0. */
32924 ix86_expand_rint (rtx operand0, rtx operand1)
32926 /* C code for the stuff we're doing below:
32927 xa = fabs (operand1);
32928 if (!isless (xa, 2**52))
32930 xa = xa + 2**52 - 2**52;
32931 return copysign (xa, operand1);
32933 enum machine_mode mode = GET_MODE (operand0);
32934 rtx res, xa, label, TWO52, mask;
32936 res = gen_reg_rtx (mode);
32937 emit_move_insn (res, operand1);
32939 /* xa = abs (operand1) */
32940 xa = ix86_expand_sse_fabs (res, &mask);
32942 /* if (!isless (xa, TWO52)) goto label; */
32943 TWO52 = ix86_gen_TWO52 (mode);
32944 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32946 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32947 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32949 ix86_sse_copysign_to_positive (res, xa, res, mask);
32951 emit_label (label);
32952 LABEL_NUSES (label) = 1;
32954 emit_move_insn (operand0, res);
32957 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32960 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
32962 /* C code for the stuff we expand below.
32963 double xa = fabs (x), x2;
32964 if (!isless (xa, TWO52))
32966 xa = xa + TWO52 - TWO52;
32967 x2 = copysign (xa, x);
32976 enum machine_mode mode = GET_MODE (operand0);
32977 rtx xa, TWO52, tmp, label, one, res, mask;
32979 TWO52 = ix86_gen_TWO52 (mode);
32981 /* Temporary for holding the result, initialized to the input
32982 operand to ease control flow. */
32983 res = gen_reg_rtx (mode);
32984 emit_move_insn (res, operand1);
32986 /* xa = abs (operand1) */
32987 xa = ix86_expand_sse_fabs (res, &mask);
32989 /* if (!isless (xa, TWO52)) goto label; */
32990 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32992 /* xa = xa + TWO52 - TWO52; */
32993 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32994 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32996 /* xa = copysign (xa, operand1) */
32997 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32999 /* generate 1.0 or -1.0 */
33000 one = force_reg (mode,
33001 const_double_from_real_value (do_floor
33002 ? dconst1 : dconstm1, mode));
33004 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
33005 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
33006 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33007 gen_rtx_AND (mode, one, tmp)));
33008 /* We always need to subtract here to preserve signed zero. */
33009 tmp = expand_simple_binop (mode, MINUS,
33010 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
33011 emit_move_insn (res, tmp);
33013 emit_label (label);
33014 LABEL_NUSES (label) = 1;
33016 emit_move_insn (operand0, res);
33019 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
33022 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
33024 /* C code for the stuff we expand below.
33025 double xa = fabs (x), x2;
33026 if (!isless (xa, TWO52))
33028 x2 = (double)(long)x;
33035 if (HONOR_SIGNED_ZEROS (mode))
33036 return copysign (x2, x);
33039 enum machine_mode mode = GET_MODE (operand0);
33040 rtx xa, xi, TWO52, tmp, label, one, res, mask;
33042 TWO52 = ix86_gen_TWO52 (mode);
33044 /* Temporary for holding the result, initialized to the input
33045 operand to ease control flow. */
33046 res = gen_reg_rtx (mode);
33047 emit_move_insn (res, operand1);
33049 /* xa = abs (operand1) */
33050 xa = ix86_expand_sse_fabs (res, &mask);
33052 /* if (!isless (xa, TWO52)) goto label; */
33053 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33055 /* xa = (double)(long)x */
33056 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
33057 expand_fix (xi, res, 0);
33058 expand_float (xa, xi, 0);
33061 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
33063 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
33064 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
33065 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33066 gen_rtx_AND (mode, one, tmp)));
33067 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
33068 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
33069 emit_move_insn (res, tmp);
33071 if (HONOR_SIGNED_ZEROS (mode))
33072 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
33074 emit_label (label);
33075 LABEL_NUSES (label) = 1;
33077 emit_move_insn (operand0, res);
33080 /* Expand SSE sequence for computing round from OPERAND1 storing
33081 into OPERAND0. Sequence that works without relying on DImode truncation
33082 via cvttsd2siq that is only available on 64bit targets. */
33084 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
33086 /* C code for the stuff we expand below.
33087 double xa = fabs (x), xa2, x2;
33088 if (!isless (xa, TWO52))
33090 Using the absolute value and copying back sign makes
33091 -0.0 -> -0.0 correct.
33092 xa2 = xa + TWO52 - TWO52;
33097 else if (dxa > 0.5)
33099 x2 = copysign (xa2, x);
33102 enum machine_mode mode = GET_MODE (operand0);
33103 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
33105 TWO52 = ix86_gen_TWO52 (mode);
33107 /* Temporary for holding the result, initialized to the input
33108 operand to ease control flow. */
33109 res = gen_reg_rtx (mode);
33110 emit_move_insn (res, operand1);
33112 /* xa = abs (operand1) */
33113 xa = ix86_expand_sse_fabs (res, &mask);
33115 /* if (!isless (xa, TWO52)) goto label; */
33116 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33118 /* xa2 = xa + TWO52 - TWO52; */
33119 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
33120 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
33122 /* dxa = xa2 - xa; */
33123 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
33125 /* generate 0.5, 1.0 and -0.5 */
33126 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
33127 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
33128 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
33132 tmp = gen_reg_rtx (mode);
33133 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
33134 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
33135 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33136 gen_rtx_AND (mode, one, tmp)));
33137 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
33138 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
33139 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
33140 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33141 gen_rtx_AND (mode, one, tmp)));
33142 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
33144 /* res = copysign (xa2, operand1) */
33145 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
33147 emit_label (label);
33148 LABEL_NUSES (label) = 1;
33150 emit_move_insn (operand0, res);
33153 /* Expand SSE sequence for computing trunc from OPERAND1 storing
33156 ix86_expand_trunc (rtx operand0, rtx operand1)
33158 /* C code for SSE variant we expand below.
33159 double xa = fabs (x), x2;
33160 if (!isless (xa, TWO52))
33162 x2 = (double)(long)x;
33163 if (HONOR_SIGNED_ZEROS (mode))
33164 return copysign (x2, x);
33167 enum machine_mode mode = GET_MODE (operand0);
33168 rtx xa, xi, TWO52, label, res, mask;
33170 TWO52 = ix86_gen_TWO52 (mode);
33172 /* Temporary for holding the result, initialized to the input
33173 operand to ease control flow. */
33174 res = gen_reg_rtx (mode);
33175 emit_move_insn (res, operand1);
33177 /* xa = abs (operand1) */
33178 xa = ix86_expand_sse_fabs (res, &mask);
33180 /* if (!isless (xa, TWO52)) goto label; */
33181 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33183 /* x = (double)(long)x */
33184 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
33185 expand_fix (xi, res, 0);
33186 expand_float (res, xi, 0);
33188 if (HONOR_SIGNED_ZEROS (mode))
33189 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
33191 emit_label (label);
33192 LABEL_NUSES (label) = 1;
33194 emit_move_insn (operand0, res);
33197 /* Expand SSE sequence for computing trunc from OPERAND1 storing
33200 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
33202 enum machine_mode mode = GET_MODE (operand0);
33203 rtx xa, mask, TWO52, label, one, res, smask, tmp;
33205 /* C code for SSE variant we expand below.
33206 double xa = fabs (x), x2;
33207 if (!isless (xa, TWO52))
33209 xa2 = xa + TWO52 - TWO52;
33213 x2 = copysign (xa2, x);
33217 TWO52 = ix86_gen_TWO52 (mode);
33219 /* Temporary for holding the result, initialized to the input
33220 operand to ease control flow. */
33221 res = gen_reg_rtx (mode);
33222 emit_move_insn (res, operand1);
33224 /* xa = abs (operand1) */
33225 xa = ix86_expand_sse_fabs (res, &smask);
33227 /* if (!isless (xa, TWO52)) goto label; */
33228 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33230 /* res = xa + TWO52 - TWO52; */
33231 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
33232 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
33233 emit_move_insn (res, tmp);
33236 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
33238 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
33239 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
33240 emit_insn (gen_rtx_SET (VOIDmode, mask,
33241 gen_rtx_AND (mode, mask, one)));
33242 tmp = expand_simple_binop (mode, MINUS,
33243 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
33244 emit_move_insn (res, tmp);
33246 /* res = copysign (res, operand1) */
33247 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
33249 emit_label (label);
33250 LABEL_NUSES (label) = 1;
33252 emit_move_insn (operand0, res);
33255 /* Expand SSE sequence for computing round from OPERAND1 storing
33258 ix86_expand_round (rtx operand0, rtx operand1)
33260 /* C code for the stuff we're doing below:
33261 double xa = fabs (x);
33262 if (!isless (xa, TWO52))
33264 xa = (double)(long)(xa + nextafter (0.5, 0.0));
33265 return copysign (xa, x);
33267 enum machine_mode mode = GET_MODE (operand0);
33268 rtx res, TWO52, xa, label, xi, half, mask;
33269 const struct real_format *fmt;
33270 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
33272 /* Temporary for holding the result, initialized to the input
33273 operand to ease control flow. */
33274 res = gen_reg_rtx (mode);
33275 emit_move_insn (res, operand1);
33277 TWO52 = ix86_gen_TWO52 (mode);
33278 xa = ix86_expand_sse_fabs (res, &mask);
33279 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33281 /* load nextafter (0.5, 0.0) */
33282 fmt = REAL_MODE_FORMAT (mode);
33283 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
33284 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
33286 /* xa = xa + 0.5 */
33287 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
33288 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
33290 /* xa = (double)(int64_t)xa */
33291 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
33292 expand_fix (xi, xa, 0);
33293 expand_float (xa, xi, 0);
33295 /* res = copysign (xa, operand1) */
33296 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
33298 emit_label (label);
33299 LABEL_NUSES (label) = 1;
33301 emit_move_insn (operand0, res);
33304 /* Expand SSE sequence for computing round
33305 from OP1 storing into OP0 using sse4 round insn. */
33307 ix86_expand_round_sse4 (rtx op0, rtx op1)
33309 enum machine_mode mode = GET_MODE (op0);
33310 rtx e1, e2, res, half;
33311 const struct real_format *fmt;
33312 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
33313 rtx (*gen_copysign) (rtx, rtx, rtx);
33314 rtx (*gen_round) (rtx, rtx, rtx);
33319 gen_copysign = gen_copysignsf3;
33320 gen_round = gen_sse4_1_roundsf2;
33323 gen_copysign = gen_copysigndf3;
33324 gen_round = gen_sse4_1_rounddf2;
33327 gcc_unreachable ();
33330 /* round (a) = trunc (a + copysign (0.5, a)) */
33332 /* load nextafter (0.5, 0.0) */
33333 fmt = REAL_MODE_FORMAT (mode);
33334 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
33335 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
33336 half = const_double_from_real_value (pred_half, mode);
33338 /* e1 = copysign (0.5, op1) */
33339 e1 = gen_reg_rtx (mode);
33340 emit_insn (gen_copysign (e1, half, op1));
33342 /* e2 = op1 + e1 */
33343 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
33345 /* res = trunc (e2) */
33346 res = gen_reg_rtx (mode);
33347 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
33349 emit_move_insn (op0, res);
33353 /* Table of valid machine attributes. */
33354 static const struct attribute_spec ix86_attribute_table[] =
33356 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
33357 affects_type_identity } */
33358 /* Stdcall attribute says callee is responsible for popping arguments
33359 if they are not variable. */
33360 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
33362 /* Fastcall attribute says callee is responsible for popping arguments
33363 if they are not variable. */
33364 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
33366 /* Thiscall attribute says callee is responsible for popping arguments
33367 if they are not variable. */
33368 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
33370 /* Cdecl attribute says the callee is a normal C declaration */
33371 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
33373 /* Regparm attribute specifies how many integer arguments are to be
33374 passed in registers. */
33375 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
33377 /* Sseregparm attribute says we are using x86_64 calling conventions
33378 for FP arguments. */
33379 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
33381 /* force_align_arg_pointer says this function realigns the stack at entry. */
33382 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
33383 false, true, true, ix86_handle_cconv_attribute, false },
33384 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33385 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
33386 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
33387 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
33390 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
33392 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
33394 #ifdef SUBTARGET_ATTRIBUTE_TABLE
33395 SUBTARGET_ATTRIBUTE_TABLE,
33397 /* ms_abi and sysv_abi calling convention function attributes. */
33398 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
33399 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
33400 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
33402 { "callee_pop_aggregate_return", 1, 1, false, true, true,
33403 ix86_handle_callee_pop_aggregate_return, true },
33405 { NULL, 0, 0, false, false, false, NULL, false }
33408 /* Implement targetm.vectorize.builtin_vectorization_cost. */
33410 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
33411 tree vectype ATTRIBUTE_UNUSED,
33412 int misalign ATTRIBUTE_UNUSED)
33414 switch (type_of_cost)
33417 return ix86_cost->scalar_stmt_cost;
33420 return ix86_cost->scalar_load_cost;
33423 return ix86_cost->scalar_store_cost;
33426 return ix86_cost->vec_stmt_cost;
33429 return ix86_cost->vec_align_load_cost;
33432 return ix86_cost->vec_store_cost;
33434 case vec_to_scalar:
33435 return ix86_cost->vec_to_scalar_cost;
33437 case scalar_to_vec:
33438 return ix86_cost->scalar_to_vec_cost;
33440 case unaligned_load:
33441 case unaligned_store:
33442 return ix86_cost->vec_unalign_load_cost;
33444 case cond_branch_taken:
33445 return ix86_cost->cond_taken_branch_cost;
33447 case cond_branch_not_taken:
33448 return ix86_cost->cond_not_taken_branch_cost;
33454 gcc_unreachable ();
33459 /* Implement targetm.vectorize.builtin_vec_perm. */
33462 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
33464 tree itype = TREE_TYPE (vec_type);
33465 bool u = TYPE_UNSIGNED (itype);
33466 enum machine_mode vmode = TYPE_MODE (vec_type);
33467 enum ix86_builtins fcode;
33468 bool ok = TARGET_SSE2;
33474 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
33477 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
33479 itype = ix86_get_builtin_type (IX86_BT_DI);
33484 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
33488 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
33490 itype = ix86_get_builtin_type (IX86_BT_SI);
33494 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
33497 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
33500 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
33503 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
33513 *mask_type = itype;
33514 return ix86_builtins[(int) fcode];
33517 /* Return a vector mode with twice as many elements as VMODE. */
33518 /* ??? Consider moving this to a table generated by genmodes.c. */
33520 static enum machine_mode
33521 doublesize_vector_mode (enum machine_mode vmode)
33525 case V2SFmode: return V4SFmode;
33526 case V1DImode: return V2DImode;
33527 case V2SImode: return V4SImode;
33528 case V4HImode: return V8HImode;
33529 case V8QImode: return V16QImode;
33531 case V2DFmode: return V4DFmode;
33532 case V4SFmode: return V8SFmode;
33533 case V2DImode: return V4DImode;
33534 case V4SImode: return V8SImode;
33535 case V8HImode: return V16HImode;
33536 case V16QImode: return V32QImode;
33538 case V4DFmode: return V8DFmode;
33539 case V8SFmode: return V16SFmode;
33540 case V4DImode: return V8DImode;
33541 case V8SImode: return V16SImode;
33542 case V16HImode: return V32HImode;
33543 case V32QImode: return V64QImode;
33546 gcc_unreachable ();
33550 /* Construct (set target (vec_select op0 (parallel perm))) and
33551 return true if that's a valid instruction in the active ISA. */
33554 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
33556 rtx rperm[MAX_VECT_LEN], x;
33559 for (i = 0; i < nelt; ++i)
33560 rperm[i] = GEN_INT (perm[i]);
33562 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
33563 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
33564 x = gen_rtx_SET (VOIDmode, target, x);
33567 if (recog_memoized (x) < 0)
33575 /* Similar, but generate a vec_concat from op0 and op1 as well. */
33578 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
33579 const unsigned char *perm, unsigned nelt)
33581 enum machine_mode v2mode;
33584 v2mode = doublesize_vector_mode (GET_MODE (op0));
33585 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
33586 return expand_vselect (target, x, perm, nelt);
33589 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33590 in terms of blendp[sd] / pblendw / pblendvb. */
33593 expand_vec_perm_blend (struct expand_vec_perm_d *d)
33595 enum machine_mode vmode = d->vmode;
33596 unsigned i, mask, nelt = d->nelt;
33597 rtx target, op0, op1, x;
33599 if (!TARGET_SSE4_1 || d->op0 == d->op1)
33601 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
33604 /* This is a blend, not a permute. Elements must stay in their
33605 respective lanes. */
33606 for (i = 0; i < nelt; ++i)
33608 unsigned e = d->perm[i];
33609 if (!(e == i || e == i + nelt))
33616 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
33617 decision should be extracted elsewhere, so that we only try that
33618 sequence once all budget==3 options have been tried. */
33620 /* For bytes, see if bytes move in pairs so we can use pblendw with
33621 an immediate argument, rather than pblendvb with a vector argument. */
33622 if (vmode == V16QImode)
33624 bool pblendw_ok = true;
33625 for (i = 0; i < 16 && pblendw_ok; i += 2)
33626 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
33630 rtx rperm[16], vperm;
33632 for (i = 0; i < nelt; ++i)
33633 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
33635 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33636 vperm = force_reg (V16QImode, vperm);
33638 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
33643 target = d->target;
33655 for (i = 0; i < nelt; ++i)
33656 mask |= (d->perm[i] >= nelt) << i;
33660 for (i = 0; i < 2; ++i)
33661 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
33665 for (i = 0; i < 4; ++i)
33666 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
33670 for (i = 0; i < 8; ++i)
33671 mask |= (d->perm[i * 2] >= 16) << i;
33675 target = gen_lowpart (vmode, target);
33676 op0 = gen_lowpart (vmode, op0);
33677 op1 = gen_lowpart (vmode, op1);
33681 gcc_unreachable ();
33684 /* This matches five different patterns with the different modes. */
33685 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
33686 x = gen_rtx_SET (VOIDmode, target, x);
33692 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33693 in terms of the variable form of vpermilps.
33695 Note that we will have already failed the immediate input vpermilps,
33696 which requires that the high and low part shuffle be identical; the
33697 variable form doesn't require that. */
33700 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
33702 rtx rperm[8], vperm;
33705 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
33708 /* We can only permute within the 128-bit lane. */
33709 for (i = 0; i < 8; ++i)
33711 unsigned e = d->perm[i];
33712 if (i < 4 ? e >= 4 : e < 4)
33719 for (i = 0; i < 8; ++i)
33721 unsigned e = d->perm[i];
33723 /* Within each 128-bit lane, the elements of op0 are numbered
33724 from 0 and the elements of op1 are numbered from 4. */
33730 rperm[i] = GEN_INT (e);
33733 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
33734 vperm = force_reg (V8SImode, vperm);
33735 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
33740 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33741 in terms of pshufb or vpperm. */
33744 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
33746 unsigned i, nelt, eltsz;
33747 rtx rperm[16], vperm, target, op0, op1;
33749 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
33751 if (GET_MODE_SIZE (d->vmode) != 16)
33758 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33760 for (i = 0; i < nelt; ++i)
33762 unsigned j, e = d->perm[i];
33763 for (j = 0; j < eltsz; ++j)
33764 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
33767 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33768 vperm = force_reg (V16QImode, vperm);
33770 target = gen_lowpart (V16QImode, d->target);
33771 op0 = gen_lowpart (V16QImode, d->op0);
33772 if (d->op0 == d->op1)
33773 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
33776 op1 = gen_lowpart (V16QImode, d->op1);
33777 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
33783 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
33784 in a single instruction. */
33787 expand_vec_perm_1 (struct expand_vec_perm_d *d)
33789 unsigned i, nelt = d->nelt;
33790 unsigned char perm2[MAX_VECT_LEN];
33792 /* Check plain VEC_SELECT first, because AVX has instructions that could
33793 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
33794 input where SEL+CONCAT may not. */
33795 if (d->op0 == d->op1)
33797 int mask = nelt - 1;
33799 for (i = 0; i < nelt; i++)
33800 perm2[i] = d->perm[i] & mask;
33802 if (expand_vselect (d->target, d->op0, perm2, nelt))
33805 /* There are plenty of patterns in sse.md that are written for
33806 SEL+CONCAT and are not replicated for a single op. Perhaps
33807 that should be changed, to avoid the nastiness here. */
33809 /* Recognize interleave style patterns, which means incrementing
33810 every other permutation operand. */
33811 for (i = 0; i < nelt; i += 2)
33813 perm2[i] = d->perm[i] & mask;
33814 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
33816 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33819 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
33822 for (i = 0; i < nelt; i += 4)
33824 perm2[i + 0] = d->perm[i + 0] & mask;
33825 perm2[i + 1] = d->perm[i + 1] & mask;
33826 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
33827 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
33830 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33835 /* Finally, try the fully general two operand permute. */
33836 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
33839 /* Recognize interleave style patterns with reversed operands. */
33840 if (d->op0 != d->op1)
33842 for (i = 0; i < nelt; ++i)
33844 unsigned e = d->perm[i];
33852 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
33856 /* Try the SSE4.1 blend variable merge instructions. */
33857 if (expand_vec_perm_blend (d))
33860 /* Try one of the AVX vpermil variable permutations. */
33861 if (expand_vec_perm_vpermil (d))
33864 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
33865 if (expand_vec_perm_pshufb (d))
33871 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33872 in terms of a pair of pshuflw + pshufhw instructions. */
33875 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
33877 unsigned char perm2[MAX_VECT_LEN];
33881 if (d->vmode != V8HImode || d->op0 != d->op1)
33884 /* The two permutations only operate in 64-bit lanes. */
33885 for (i = 0; i < 4; ++i)
33886 if (d->perm[i] >= 4)
33888 for (i = 4; i < 8; ++i)
33889 if (d->perm[i] < 4)
33895 /* Emit the pshuflw. */
33896 memcpy (perm2, d->perm, 4);
33897 for (i = 4; i < 8; ++i)
33899 ok = expand_vselect (d->target, d->op0, perm2, 8);
33902 /* Emit the pshufhw. */
33903 memcpy (perm2 + 4, d->perm + 4, 4);
33904 for (i = 0; i < 4; ++i)
33906 ok = expand_vselect (d->target, d->target, perm2, 8);
33912 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33913 the permutation using the SSSE3 palignr instruction. This succeeds
33914 when all of the elements in PERM fit within one vector and we merely
33915 need to shift them down so that a single vector permutation has a
33916 chance to succeed. */
33919 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
33921 unsigned i, nelt = d->nelt;
33926 /* Even with AVX, palignr only operates on 128-bit vectors. */
33927 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33930 min = nelt, max = 0;
33931 for (i = 0; i < nelt; ++i)
33933 unsigned e = d->perm[i];
33939 if (min == 0 || max - min >= nelt)
33942 /* Given that we have SSSE3, we know we'll be able to implement the
33943 single operand permutation after the palignr with pshufb. */
33947 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
33948 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
33949 gen_lowpart (TImode, d->op1),
33950 gen_lowpart (TImode, d->op0), shift));
33952 d->op0 = d->op1 = d->target;
33955 for (i = 0; i < nelt; ++i)
33957 unsigned e = d->perm[i] - min;
33963 /* Test for the degenerate case where the alignment by itself
33964 produces the desired permutation. */
33968 ok = expand_vec_perm_1 (d);
33974 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33975 a two vector permutation into a single vector permutation by using
33976 an interleave operation to merge the vectors. */
33979 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
33981 struct expand_vec_perm_d dremap, dfinal;
33982 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
33983 unsigned contents, h1, h2, h3, h4;
33984 unsigned char remap[2 * MAX_VECT_LEN];
33988 if (d->op0 == d->op1)
33991 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33992 lanes. We can use similar techniques with the vperm2f128 instruction,
33993 but it requires slightly different logic. */
33994 if (GET_MODE_SIZE (d->vmode) != 16)
33997 /* Examine from whence the elements come. */
33999 for (i = 0; i < nelt; ++i)
34000 contents |= 1u << d->perm[i];
34002 /* Split the two input vectors into 4 halves. */
34003 h1 = (1u << nelt2) - 1;
34008 memset (remap, 0xff, sizeof (remap));
34011 /* If the elements from the low halves use interleave low, and similarly
34012 for interleave high. If the elements are from mis-matched halves, we
34013 can use shufps for V4SF/V4SI or do a DImode shuffle. */
34014 if ((contents & (h1 | h3)) == contents)
34016 for (i = 0; i < nelt2; ++i)
34019 remap[i + nelt] = i * 2 + 1;
34020 dremap.perm[i * 2] = i;
34021 dremap.perm[i * 2 + 1] = i + nelt;
34024 else if ((contents & (h2 | h4)) == contents)
34026 for (i = 0; i < nelt2; ++i)
34028 remap[i + nelt2] = i * 2;
34029 remap[i + nelt + nelt2] = i * 2 + 1;
34030 dremap.perm[i * 2] = i + nelt2;
34031 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
34034 else if ((contents & (h1 | h4)) == contents)
34036 for (i = 0; i < nelt2; ++i)
34039 remap[i + nelt + nelt2] = i + nelt2;
34040 dremap.perm[i] = i;
34041 dremap.perm[i + nelt2] = i + nelt + nelt2;
34045 dremap.vmode = V2DImode;
34047 dremap.perm[0] = 0;
34048 dremap.perm[1] = 3;
34051 else if ((contents & (h2 | h3)) == contents)
34053 for (i = 0; i < nelt2; ++i)
34055 remap[i + nelt2] = i;
34056 remap[i + nelt] = i + nelt2;
34057 dremap.perm[i] = i + nelt2;
34058 dremap.perm[i + nelt2] = i + nelt;
34062 dremap.vmode = V2DImode;
34064 dremap.perm[0] = 1;
34065 dremap.perm[1] = 2;
34071 /* Use the remapping array set up above to move the elements from their
34072 swizzled locations into their final destinations. */
34074 for (i = 0; i < nelt; ++i)
34076 unsigned e = remap[d->perm[i]];
34077 gcc_assert (e < nelt);
34078 dfinal.perm[i] = e;
34080 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
34081 dfinal.op1 = dfinal.op0;
34082 dremap.target = dfinal.op0;
34084 /* Test if the final remap can be done with a single insn. For V4SFmode or
34085 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
34087 ok = expand_vec_perm_1 (&dfinal);
34088 seq = get_insns ();
34094 if (dremap.vmode != dfinal.vmode)
34096 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
34097 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
34098 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
34101 ok = expand_vec_perm_1 (&dremap);
34108 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
34109 permutation with two pshufb insns and an ior. We should have already
34110 failed all two instruction sequences. */
34113 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
34115 rtx rperm[2][16], vperm, l, h, op, m128;
34116 unsigned int i, nelt, eltsz;
34118 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
34120 gcc_assert (d->op0 != d->op1);
34123 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
34125 /* Generate two permutation masks. If the required element is within
34126 the given vector it is shuffled into the proper lane. If the required
34127 element is in the other vector, force a zero into the lane by setting
34128 bit 7 in the permutation mask. */
34129 m128 = GEN_INT (-128);
34130 for (i = 0; i < nelt; ++i)
34132 unsigned j, e = d->perm[i];
34133 unsigned which = (e >= nelt);
34137 for (j = 0; j < eltsz; ++j)
34139 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
34140 rperm[1-which][i*eltsz + j] = m128;
34144 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
34145 vperm = force_reg (V16QImode, vperm);
34147 l = gen_reg_rtx (V16QImode);
34148 op = gen_lowpart (V16QImode, d->op0);
34149 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
34151 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
34152 vperm = force_reg (V16QImode, vperm);
34154 h = gen_reg_rtx (V16QImode);
34155 op = gen_lowpart (V16QImode, d->op1);
34156 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
34158 op = gen_lowpart (V16QImode, d->target);
34159 emit_insn (gen_iorv16qi3 (op, l, h));
34164 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
34165 and extract-odd permutations. */
34168 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
34175 t1 = gen_reg_rtx (V4DFmode);
34176 t2 = gen_reg_rtx (V4DFmode);
34178 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
34179 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
34180 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
34182 /* Now an unpck[lh]pd will produce the result required. */
34184 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
34186 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
34192 int mask = odd ? 0xdd : 0x88;
34194 t1 = gen_reg_rtx (V8SFmode);
34195 t2 = gen_reg_rtx (V8SFmode);
34196 t3 = gen_reg_rtx (V8SFmode);
34198 /* Shuffle within the 128-bit lanes to produce:
34199 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
34200 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
34203 /* Shuffle the lanes around to produce:
34204 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
34205 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
34208 /* Shuffle within the 128-bit lanes to produce:
34209 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
34210 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
34212 /* Shuffle within the 128-bit lanes to produce:
34213 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
34214 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
34216 /* Shuffle the lanes around to produce:
34217 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
34218 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
34227 /* These are always directly implementable by expand_vec_perm_1. */
34228 gcc_unreachable ();
34232 return expand_vec_perm_pshufb2 (d);
34235 /* We need 2*log2(N)-1 operations to achieve odd/even
34236 with interleave. */
34237 t1 = gen_reg_rtx (V8HImode);
34238 t2 = gen_reg_rtx (V8HImode);
34239 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
34240 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
34241 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
34242 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
34244 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
34246 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
34253 return expand_vec_perm_pshufb2 (d);
34256 t1 = gen_reg_rtx (V16QImode);
34257 t2 = gen_reg_rtx (V16QImode);
34258 t3 = gen_reg_rtx (V16QImode);
34259 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
34260 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
34261 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
34262 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
34263 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
34264 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
34266 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
34268 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
34274 gcc_unreachable ();
34280 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
34281 extract-even and extract-odd permutations. */
34284 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
34286 unsigned i, odd, nelt = d->nelt;
34289 if (odd != 0 && odd != 1)
34292 for (i = 1; i < nelt; ++i)
34293 if (d->perm[i] != 2 * i + odd)
34296 return expand_vec_perm_even_odd_1 (d, odd);
34299 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
34300 permutations. We assume that expand_vec_perm_1 has already failed. */
34303 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
34305 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
34306 enum machine_mode vmode = d->vmode;
34307 unsigned char perm2[4];
34315 /* These are special-cased in sse.md so that we can optionally
34316 use the vbroadcast instruction. They expand to two insns
34317 if the input happens to be in a register. */
34318 gcc_unreachable ();
34324 /* These are always implementable using standard shuffle patterns. */
34325 gcc_unreachable ();
34329 /* These can be implemented via interleave. We save one insn by
34330 stopping once we have promoted to V4SImode and then use pshufd. */
34333 optab otab = vec_interleave_low_optab;
34337 otab = vec_interleave_high_optab;
34342 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
34343 vmode = get_mode_wider_vector (vmode);
34344 op0 = gen_lowpart (vmode, op0);
34346 while (vmode != V4SImode);
34348 memset (perm2, elt, 4);
34349 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
34354 gcc_unreachable ();
34358 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
34359 broadcast permutations. */
34362 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
34364 unsigned i, elt, nelt = d->nelt;
34366 if (d->op0 != d->op1)
34370 for (i = 1; i < nelt; ++i)
34371 if (d->perm[i] != elt)
34374 return expand_vec_perm_broadcast_1 (d);
34377 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
34378 With all of the interface bits taken care of, perform the expansion
34379 in D and return true on success. */
34382 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
34384 /* Try a single instruction expansion. */
34385 if (expand_vec_perm_1 (d))
34388 /* Try sequences of two instructions. */
34390 if (expand_vec_perm_pshuflw_pshufhw (d))
34393 if (expand_vec_perm_palignr (d))
34396 if (expand_vec_perm_interleave2 (d))
34399 if (expand_vec_perm_broadcast (d))
34402 /* Try sequences of three instructions. */
34404 if (expand_vec_perm_pshufb2 (d))
34407 /* ??? Look for narrow permutations whose element orderings would
34408 allow the promotion to a wider mode. */
34410 /* ??? Look for sequences of interleave or a wider permute that place
34411 the data into the correct lanes for a half-vector shuffle like
34412 pshuf[lh]w or vpermilps. */
34414 /* ??? Look for sequences of interleave that produce the desired results.
34415 The combinatorics of punpck[lh] get pretty ugly... */
34417 if (expand_vec_perm_even_odd (d))
34423 /* Extract the values from the vector CST into the permutation array in D.
34424 Return 0 on error, 1 if all values from the permutation come from the
34425 first vector, 2 if all values from the second vector, and 3 otherwise. */
34428 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
34430 tree list = TREE_VECTOR_CST_ELTS (cst);
34431 unsigned i, nelt = d->nelt;
34434 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
34436 unsigned HOST_WIDE_INT e;
34438 if (!host_integerp (TREE_VALUE (list), 1))
34440 e = tree_low_cst (TREE_VALUE (list), 1);
34444 ret |= (e < nelt ? 1 : 2);
34447 gcc_assert (list == NULL);
34449 /* For all elements from second vector, fold the elements to first. */
34451 for (i = 0; i < nelt; ++i)
34452 d->perm[i] -= nelt;
34458 ix86_expand_vec_perm_builtin (tree exp)
34460 struct expand_vec_perm_d d;
34461 tree arg0, arg1, arg2;
34463 arg0 = CALL_EXPR_ARG (exp, 0);
34464 arg1 = CALL_EXPR_ARG (exp, 1);
34465 arg2 = CALL_EXPR_ARG (exp, 2);
34467 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
34468 d.nelt = GET_MODE_NUNITS (d.vmode);
34469 d.testing_p = false;
34470 gcc_assert (VECTOR_MODE_P (d.vmode));
34472 if (TREE_CODE (arg2) != VECTOR_CST)
34474 error_at (EXPR_LOCATION (exp),
34475 "vector permutation requires vector constant");
34479 switch (extract_vec_perm_cst (&d, arg2))
34485 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
34489 if (!operand_equal_p (arg0, arg1, 0))
34491 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
34492 d.op0 = force_reg (d.vmode, d.op0);
34493 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
34494 d.op1 = force_reg (d.vmode, d.op1);
34498 /* The elements of PERM do not suggest that only the first operand
34499 is used, but both operands are identical. Allow easier matching
34500 of the permutation by folding the permutation into the single
34503 unsigned i, nelt = d.nelt;
34504 for (i = 0; i < nelt; ++i)
34505 if (d.perm[i] >= nelt)
34511 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
34512 d.op0 = force_reg (d.vmode, d.op0);
34517 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
34518 d.op0 = force_reg (d.vmode, d.op0);
34523 d.target = gen_reg_rtx (d.vmode);
34524 if (ix86_expand_vec_perm_builtin_1 (&d))
34527 /* For compiler generated permutations, we should never got here, because
34528 the compiler should also be checking the ok hook. But since this is a
34529 builtin the user has access too, so don't abort. */
34533 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
34536 sorry ("vector permutation (%d %d %d %d)",
34537 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
34540 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
34541 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
34542 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
34545 sorry ("vector permutation "
34546 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
34547 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
34548 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
34549 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
34550 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
34553 gcc_unreachable ();
34556 return CONST0_RTX (d.vmode);
34559 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
34562 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
34564 struct expand_vec_perm_d d;
34568 d.vmode = TYPE_MODE (vec_type);
34569 d.nelt = GET_MODE_NUNITS (d.vmode);
34570 d.testing_p = true;
34572 /* Given sufficient ISA support we can just return true here
34573 for selected vector modes. */
34574 if (GET_MODE_SIZE (d.vmode) == 16)
34576 /* All implementable with a single vpperm insn. */
34579 /* All implementable with 2 pshufb + 1 ior. */
34582 /* All implementable with shufpd or unpck[lh]pd. */
34587 vec_mask = extract_vec_perm_cst (&d, mask);
34589 /* This hook is cannot be called in response to something that the
34590 user does (unlike the builtin expander) so we shouldn't ever see
34591 an error generated from the extract. */
34592 gcc_assert (vec_mask > 0 && vec_mask <= 3);
34593 one_vec = (vec_mask != 3);
34595 /* Implementable with shufps or pshufd. */
34596 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
34599 /* Otherwise we have to go through the motions and see if we can
34600 figure out how to generate the requested permutation. */
34601 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
34602 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
34604 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
34607 ret = ix86_expand_vec_perm_builtin_1 (&d);
34614 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
34616 struct expand_vec_perm_d d;
34622 d.vmode = GET_MODE (targ);
34623 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
34624 d.testing_p = false;
34626 for (i = 0; i < nelt; ++i)
34627 d.perm[i] = i * 2 + odd;
34629 /* We'll either be able to implement the permutation directly... */
34630 if (expand_vec_perm_1 (&d))
34633 /* ... or we use the special-case patterns. */
34634 expand_vec_perm_even_odd_1 (&d, odd);
34637 /* Expand an insert into a vector register through pinsr insn.
34638 Return true if successful. */
34641 ix86_expand_pinsr (rtx *operands)
34643 rtx dst = operands[0];
34644 rtx src = operands[3];
34646 unsigned int size = INTVAL (operands[1]);
34647 unsigned int pos = INTVAL (operands[2]);
34649 if (GET_CODE (dst) == SUBREG)
34651 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
34652 dst = SUBREG_REG (dst);
34655 if (GET_CODE (src) == SUBREG)
34656 src = SUBREG_REG (src);
34658 switch (GET_MODE (dst))
34665 enum machine_mode srcmode, dstmode;
34666 rtx (*pinsr)(rtx, rtx, rtx, rtx);
34668 srcmode = mode_for_size (size, MODE_INT, 0);
34673 if (!TARGET_SSE4_1)
34675 dstmode = V16QImode;
34676 pinsr = gen_sse4_1_pinsrb;
34682 dstmode = V8HImode;
34683 pinsr = gen_sse2_pinsrw;
34687 if (!TARGET_SSE4_1)
34689 dstmode = V4SImode;
34690 pinsr = gen_sse4_1_pinsrd;
34694 gcc_assert (TARGET_64BIT);
34695 if (!TARGET_SSE4_1)
34697 dstmode = V2DImode;
34698 pinsr = gen_sse4_1_pinsrq;
34705 dst = gen_lowpart (dstmode, dst);
34706 src = gen_lowpart (srcmode, src);
34710 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
34719 /* This function returns the calling abi specific va_list type node.
34720 It returns the FNDECL specific va_list type. */
34723 ix86_fn_abi_va_list (tree fndecl)
34726 return va_list_type_node;
34727 gcc_assert (fndecl != NULL_TREE);
34729 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
34730 return ms_va_list_type_node;
34732 return sysv_va_list_type_node;
34735 /* Returns the canonical va_list type specified by TYPE. If there
34736 is no valid TYPE provided, it return NULL_TREE. */
34739 ix86_canonical_va_list_type (tree type)
34743 /* Resolve references and pointers to va_list type. */
34744 if (TREE_CODE (type) == MEM_REF)
34745 type = TREE_TYPE (type);
34746 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
34747 type = TREE_TYPE (type);
34748 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
34749 type = TREE_TYPE (type);
34751 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
34753 wtype = va_list_type_node;
34754 gcc_assert (wtype != NULL_TREE);
34756 if (TREE_CODE (wtype) == ARRAY_TYPE)
34758 /* If va_list is an array type, the argument may have decayed
34759 to a pointer type, e.g. by being passed to another function.
34760 In that case, unwrap both types so that we can compare the
34761 underlying records. */
34762 if (TREE_CODE (htype) == ARRAY_TYPE
34763 || POINTER_TYPE_P (htype))
34765 wtype = TREE_TYPE (wtype);
34766 htype = TREE_TYPE (htype);
34769 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34770 return va_list_type_node;
34771 wtype = sysv_va_list_type_node;
34772 gcc_assert (wtype != NULL_TREE);
34774 if (TREE_CODE (wtype) == ARRAY_TYPE)
34776 /* If va_list is an array type, the argument may have decayed
34777 to a pointer type, e.g. by being passed to another function.
34778 In that case, unwrap both types so that we can compare the
34779 underlying records. */
34780 if (TREE_CODE (htype) == ARRAY_TYPE
34781 || POINTER_TYPE_P (htype))
34783 wtype = TREE_TYPE (wtype);
34784 htype = TREE_TYPE (htype);
34787 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34788 return sysv_va_list_type_node;
34789 wtype = ms_va_list_type_node;
34790 gcc_assert (wtype != NULL_TREE);
34792 if (TREE_CODE (wtype) == ARRAY_TYPE)
34794 /* If va_list is an array type, the argument may have decayed
34795 to a pointer type, e.g. by being passed to another function.
34796 In that case, unwrap both types so that we can compare the
34797 underlying records. */
34798 if (TREE_CODE (htype) == ARRAY_TYPE
34799 || POINTER_TYPE_P (htype))
34801 wtype = TREE_TYPE (wtype);
34802 htype = TREE_TYPE (htype);
34805 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34806 return ms_va_list_type_node;
34809 return std_canonical_va_list_type (type);
34812 /* Iterate through the target-specific builtin types for va_list.
34813 IDX denotes the iterator, *PTREE is set to the result type of
34814 the va_list builtin, and *PNAME to its internal type.
34815 Returns zero if there is no element for this index, otherwise
34816 IDX should be increased upon the next call.
34817 Note, do not iterate a base builtin's name like __builtin_va_list.
34818 Used from c_common_nodes_and_builtins. */
34821 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
34831 *ptree = ms_va_list_type_node;
34832 *pname = "__builtin_ms_va_list";
34836 *ptree = sysv_va_list_type_node;
34837 *pname = "__builtin_sysv_va_list";
34845 #undef TARGET_SCHED_DISPATCH
34846 #define TARGET_SCHED_DISPATCH has_dispatch
34847 #undef TARGET_SCHED_DISPATCH_DO
34848 #define TARGET_SCHED_DISPATCH_DO do_dispatch
34850 /* The size of the dispatch window is the total number of bytes of
34851 object code allowed in a window. */
34852 #define DISPATCH_WINDOW_SIZE 16
34854 /* Number of dispatch windows considered for scheduling. */
34855 #define MAX_DISPATCH_WINDOWS 3
34857 /* Maximum number of instructions in a window. */
34860 /* Maximum number of immediate operands in a window. */
34863 /* Maximum number of immediate bits allowed in a window. */
34864 #define MAX_IMM_SIZE 128
34866 /* Maximum number of 32 bit immediates allowed in a window. */
34867 #define MAX_IMM_32 4
34869 /* Maximum number of 64 bit immediates allowed in a window. */
34870 #define MAX_IMM_64 2
34872 /* Maximum total of loads or prefetches allowed in a window. */
34875 /* Maximum total of stores allowed in a window. */
34876 #define MAX_STORE 1
34882 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
34883 enum dispatch_group {
34898 /* Number of allowable groups in a dispatch window. It is an array
34899 indexed by dispatch_group enum. 100 is used as a big number,
34900 because the number of these kind of operations does not have any
34901 effect in dispatch window, but we need them for other reasons in
34903 static unsigned int num_allowable_groups[disp_last] = {
34904 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
34907 char group_name[disp_last + 1][16] = {
34908 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
34909 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
34910 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
34913 /* Instruction path. */
34916 path_single, /* Single micro op. */
34917 path_double, /* Double micro op. */
34918 path_multi, /* Instructions with more than 2 micro op.. */
34922 /* sched_insn_info defines a window to the instructions scheduled in
34923 the basic block. It contains a pointer to the insn_info table and
34924 the instruction scheduled.
34926 Windows are allocated for each basic block and are linked
34928 typedef struct sched_insn_info_s {
34930 enum dispatch_group group;
34931 enum insn_path path;
34936 /* Linked list of dispatch windows. This is a two way list of
34937 dispatch windows of a basic block. It contains information about
34938 the number of uops in the window and the total number of
34939 instructions and of bytes in the object code for this dispatch
34941 typedef struct dispatch_windows_s {
34942 int num_insn; /* Number of insn in the window. */
34943 int num_uops; /* Number of uops in the window. */
34944 int window_size; /* Number of bytes in the window. */
34945 int window_num; /* Window number between 0 or 1. */
34946 int num_imm; /* Number of immediates in an insn. */
34947 int num_imm_32; /* Number of 32 bit immediates in an insn. */
34948 int num_imm_64; /* Number of 64 bit immediates in an insn. */
34949 int imm_size; /* Total immediates in the window. */
34950 int num_loads; /* Total memory loads in the window. */
34951 int num_stores; /* Total memory stores in the window. */
34952 int violation; /* Violation exists in window. */
34953 sched_insn_info *window; /* Pointer to the window. */
34954 struct dispatch_windows_s *next;
34955 struct dispatch_windows_s *prev;
34956 } dispatch_windows;
34958 /* Immediate valuse used in an insn. */
34959 typedef struct imm_info_s
34966 static dispatch_windows *dispatch_window_list;
34967 static dispatch_windows *dispatch_window_list1;
34969 /* Get dispatch group of insn. */
34971 static enum dispatch_group
34972 get_mem_group (rtx insn)
34974 enum attr_memory memory;
34976 if (INSN_CODE (insn) < 0)
34977 return disp_no_group;
34978 memory = get_attr_memory (insn);
34979 if (memory == MEMORY_STORE)
34982 if (memory == MEMORY_LOAD)
34985 if (memory == MEMORY_BOTH)
34986 return disp_load_store;
34988 return disp_no_group;
34991 /* Return true if insn is a compare instruction. */
34996 enum attr_type type;
34998 type = get_attr_type (insn);
34999 return (type == TYPE_TEST
35000 || type == TYPE_ICMP
35001 || type == TYPE_FCMP
35002 || GET_CODE (PATTERN (insn)) == COMPARE);
35005 /* Return true if a dispatch violation encountered. */
35008 dispatch_violation (void)
35010 if (dispatch_window_list->next)
35011 return dispatch_window_list->next->violation;
35012 return dispatch_window_list->violation;
35015 /* Return true if insn is a branch instruction. */
35018 is_branch (rtx insn)
35020 return (CALL_P (insn) || JUMP_P (insn));
35023 /* Return true if insn is a prefetch instruction. */
35026 is_prefetch (rtx insn)
35028 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
35031 /* This function initializes a dispatch window and the list container holding a
35032 pointer to the window. */
35035 init_window (int window_num)
35038 dispatch_windows *new_list;
35040 if (window_num == 0)
35041 new_list = dispatch_window_list;
35043 new_list = dispatch_window_list1;
35045 new_list->num_insn = 0;
35046 new_list->num_uops = 0;
35047 new_list->window_size = 0;
35048 new_list->next = NULL;
35049 new_list->prev = NULL;
35050 new_list->window_num = window_num;
35051 new_list->num_imm = 0;
35052 new_list->num_imm_32 = 0;
35053 new_list->num_imm_64 = 0;
35054 new_list->imm_size = 0;
35055 new_list->num_loads = 0;
35056 new_list->num_stores = 0;
35057 new_list->violation = false;
35059 for (i = 0; i < MAX_INSN; i++)
35061 new_list->window[i].insn = NULL;
35062 new_list->window[i].group = disp_no_group;
35063 new_list->window[i].path = no_path;
35064 new_list->window[i].byte_len = 0;
35065 new_list->window[i].imm_bytes = 0;
35070 /* This function allocates and initializes a dispatch window and the
35071 list container holding a pointer to the window. */
35073 static dispatch_windows *
35074 allocate_window (void)
35076 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
35077 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
35082 /* This routine initializes the dispatch scheduling information. It
35083 initiates building dispatch scheduler tables and constructs the
35084 first dispatch window. */
35087 init_dispatch_sched (void)
35089 /* Allocate a dispatch list and a window. */
35090 dispatch_window_list = allocate_window ();
35091 dispatch_window_list1 = allocate_window ();
35096 /* This function returns true if a branch is detected. End of a basic block
35097 does not have to be a branch, but here we assume only branches end a
35101 is_end_basic_block (enum dispatch_group group)
35103 return group == disp_branch;
35106 /* This function is called when the end of a window processing is reached. */
35109 process_end_window (void)
35111 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
35112 if (dispatch_window_list->next)
35114 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
35115 gcc_assert (dispatch_window_list->window_size
35116 + dispatch_window_list1->window_size <= 48);
35122 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
35123 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
35124 for 48 bytes of instructions. Note that these windows are not dispatch
35125 windows that their sizes are DISPATCH_WINDOW_SIZE. */
35127 static dispatch_windows *
35128 allocate_next_window (int window_num)
35130 if (window_num == 0)
35132 if (dispatch_window_list->next)
35135 return dispatch_window_list;
35138 dispatch_window_list->next = dispatch_window_list1;
35139 dispatch_window_list1->prev = dispatch_window_list;
35141 return dispatch_window_list1;
35144 /* Increment the number of immediate operands of an instruction. */
35147 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
35152 switch ( GET_CODE (*in_rtx))
35157 (imm_values->imm)++;
35158 if (x86_64_immediate_operand (*in_rtx, SImode))
35159 (imm_values->imm32)++;
35161 (imm_values->imm64)++;
35165 (imm_values->imm)++;
35166 (imm_values->imm64)++;
35170 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
35172 (imm_values->imm)++;
35173 (imm_values->imm32)++;
35184 /* Compute number of immediate operands of an instruction. */
35187 find_constant (rtx in_rtx, imm_info *imm_values)
35189 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
35190 (rtx_function) find_constant_1, (void *) imm_values);
35193 /* Return total size of immediate operands of an instruction along with number
35194 of corresponding immediate-operands. It initializes its parameters to zero
35195 befor calling FIND_CONSTANT.
35196 INSN is the input instruction. IMM is the total of immediates.
35197 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
35201 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
35203 imm_info imm_values = {0, 0, 0};
35205 find_constant (insn, &imm_values);
35206 *imm = imm_values.imm;
35207 *imm32 = imm_values.imm32;
35208 *imm64 = imm_values.imm64;
35209 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
35212 /* This function indicates if an operand of an instruction is an
35216 has_immediate (rtx insn)
35218 int num_imm_operand;
35219 int num_imm32_operand;
35220 int num_imm64_operand;
35223 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
35224 &num_imm64_operand);
35228 /* Return single or double path for instructions. */
35230 static enum insn_path
35231 get_insn_path (rtx insn)
35233 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
35235 if ((int)path == 0)
35236 return path_single;
35238 if ((int)path == 1)
35239 return path_double;
35244 /* Return insn dispatch group. */
35246 static enum dispatch_group
35247 get_insn_group (rtx insn)
35249 enum dispatch_group group = get_mem_group (insn);
35253 if (is_branch (insn))
35254 return disp_branch;
35259 if (has_immediate (insn))
35262 if (is_prefetch (insn))
35263 return disp_prefetch;
35265 return disp_no_group;
35268 /* Count number of GROUP restricted instructions in a dispatch
35269 window WINDOW_LIST. */
35272 count_num_restricted (rtx insn, dispatch_windows *window_list)
35274 enum dispatch_group group = get_insn_group (insn);
35276 int num_imm_operand;
35277 int num_imm32_operand;
35278 int num_imm64_operand;
35280 if (group == disp_no_group)
35283 if (group == disp_imm)
35285 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
35286 &num_imm64_operand);
35287 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
35288 || num_imm_operand + window_list->num_imm > MAX_IMM
35289 || (num_imm32_operand > 0
35290 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
35291 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
35292 || (num_imm64_operand > 0
35293 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
35294 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
35295 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
35296 && num_imm64_operand > 0
35297 && ((window_list->num_imm_64 > 0
35298 && window_list->num_insn >= 2)
35299 || window_list->num_insn >= 3)))
35305 if ((group == disp_load_store
35306 && (window_list->num_loads >= MAX_LOAD
35307 || window_list->num_stores >= MAX_STORE))
35308 || ((group == disp_load
35309 || group == disp_prefetch)
35310 && window_list->num_loads >= MAX_LOAD)
35311 || (group == disp_store
35312 && window_list->num_stores >= MAX_STORE))
35318 /* This function returns true if insn satisfies dispatch rules on the
35319 last window scheduled. */
35322 fits_dispatch_window (rtx insn)
35324 dispatch_windows *window_list = dispatch_window_list;
35325 dispatch_windows *window_list_next = dispatch_window_list->next;
35326 unsigned int num_restrict;
35327 enum dispatch_group group = get_insn_group (insn);
35328 enum insn_path path = get_insn_path (insn);
35331 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
35332 instructions should be given the lowest priority in the
35333 scheduling process in Haifa scheduler to make sure they will be
35334 scheduled in the same dispatch window as the refrence to them. */
35335 if (group == disp_jcc || group == disp_cmp)
35338 /* Check nonrestricted. */
35339 if (group == disp_no_group || group == disp_branch)
35342 /* Get last dispatch window. */
35343 if (window_list_next)
35344 window_list = window_list_next;
35346 if (window_list->window_num == 1)
35348 sum = window_list->prev->window_size + window_list->window_size;
35351 || (min_insn_size (insn) + sum) >= 48)
35352 /* Window 1 is full. Go for next window. */
35356 num_restrict = count_num_restricted (insn, window_list);
35358 if (num_restrict > num_allowable_groups[group])
35361 /* See if it fits in the first window. */
35362 if (window_list->window_num == 0)
35364 /* The first widow should have only single and double path
35366 if (path == path_double
35367 && (window_list->num_uops + 2) > MAX_INSN)
35369 else if (path != path_single)
35375 /* Add an instruction INSN with NUM_UOPS micro-operations to the
35376 dispatch window WINDOW_LIST. */
35379 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
35381 int byte_len = min_insn_size (insn);
35382 int num_insn = window_list->num_insn;
35384 sched_insn_info *window = window_list->window;
35385 enum dispatch_group group = get_insn_group (insn);
35386 enum insn_path path = get_insn_path (insn);
35387 int num_imm_operand;
35388 int num_imm32_operand;
35389 int num_imm64_operand;
35391 if (!window_list->violation && group != disp_cmp
35392 && !fits_dispatch_window (insn))
35393 window_list->violation = true;
35395 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
35396 &num_imm64_operand);
35398 /* Initialize window with new instruction. */
35399 window[num_insn].insn = insn;
35400 window[num_insn].byte_len = byte_len;
35401 window[num_insn].group = group;
35402 window[num_insn].path = path;
35403 window[num_insn].imm_bytes = imm_size;
35405 window_list->window_size += byte_len;
35406 window_list->num_insn = num_insn + 1;
35407 window_list->num_uops = window_list->num_uops + num_uops;
35408 window_list->imm_size += imm_size;
35409 window_list->num_imm += num_imm_operand;
35410 window_list->num_imm_32 += num_imm32_operand;
35411 window_list->num_imm_64 += num_imm64_operand;
35413 if (group == disp_store)
35414 window_list->num_stores += 1;
35415 else if (group == disp_load
35416 || group == disp_prefetch)
35417 window_list->num_loads += 1;
35418 else if (group == disp_load_store)
35420 window_list->num_stores += 1;
35421 window_list->num_loads += 1;
35425 /* Adds a scheduled instruction, INSN, to the current dispatch window.
35426 If the total bytes of instructions or the number of instructions in
35427 the window exceed allowable, it allocates a new window. */
35430 add_to_dispatch_window (rtx insn)
35433 dispatch_windows *window_list;
35434 dispatch_windows *next_list;
35435 dispatch_windows *window0_list;
35436 enum insn_path path;
35437 enum dispatch_group insn_group;
35445 if (INSN_CODE (insn) < 0)
35448 byte_len = min_insn_size (insn);
35449 window_list = dispatch_window_list;
35450 next_list = window_list->next;
35451 path = get_insn_path (insn);
35452 insn_group = get_insn_group (insn);
35454 /* Get the last dispatch window. */
35456 window_list = dispatch_window_list->next;
35458 if (path == path_single)
35460 else if (path == path_double)
35463 insn_num_uops = (int) path;
35465 /* If current window is full, get a new window.
35466 Window number zero is full, if MAX_INSN uops are scheduled in it.
35467 Window number one is full, if window zero's bytes plus window
35468 one's bytes is 32, or if the bytes of the new instruction added
35469 to the total makes it greater than 48, or it has already MAX_INSN
35470 instructions in it. */
35471 num_insn = window_list->num_insn;
35472 num_uops = window_list->num_uops;
35473 window_num = window_list->window_num;
35474 insn_fits = fits_dispatch_window (insn);
35476 if (num_insn >= MAX_INSN
35477 || num_uops + insn_num_uops > MAX_INSN
35480 window_num = ~window_num & 1;
35481 window_list = allocate_next_window (window_num);
35484 if (window_num == 0)
35486 add_insn_window (insn, window_list, insn_num_uops);
35487 if (window_list->num_insn >= MAX_INSN
35488 && insn_group == disp_branch)
35490 process_end_window ();
35494 else if (window_num == 1)
35496 window0_list = window_list->prev;
35497 sum = window0_list->window_size + window_list->window_size;
35499 || (byte_len + sum) >= 48)
35501 process_end_window ();
35502 window_list = dispatch_window_list;
35505 add_insn_window (insn, window_list, insn_num_uops);
35508 gcc_unreachable ();
35510 if (is_end_basic_block (insn_group))
35512 /* End of basic block is reached do end-basic-block process. */
35513 process_end_window ();
35518 /* Print the dispatch window, WINDOW_NUM, to FILE. */
35520 DEBUG_FUNCTION static void
35521 debug_dispatch_window_file (FILE *file, int window_num)
35523 dispatch_windows *list;
35526 if (window_num == 0)
35527 list = dispatch_window_list;
35529 list = dispatch_window_list1;
35531 fprintf (file, "Window #%d:\n", list->window_num);
35532 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
35533 list->num_insn, list->num_uops, list->window_size);
35534 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
35535 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
35537 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
35539 fprintf (file, " insn info:\n");
35541 for (i = 0; i < MAX_INSN; i++)
35543 if (!list->window[i].insn)
35545 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
35546 i, group_name[list->window[i].group],
35547 i, (void *)list->window[i].insn,
35548 i, list->window[i].path,
35549 i, list->window[i].byte_len,
35550 i, list->window[i].imm_bytes);
35554 /* Print to stdout a dispatch window. */
35556 DEBUG_FUNCTION void
35557 debug_dispatch_window (int window_num)
35559 debug_dispatch_window_file (stdout, window_num);
35562 /* Print INSN dispatch information to FILE. */
35564 DEBUG_FUNCTION static void
35565 debug_insn_dispatch_info_file (FILE *file, rtx insn)
35568 enum insn_path path;
35569 enum dispatch_group group;
35571 int num_imm_operand;
35572 int num_imm32_operand;
35573 int num_imm64_operand;
35575 if (INSN_CODE (insn) < 0)
35578 byte_len = min_insn_size (insn);
35579 path = get_insn_path (insn);
35580 group = get_insn_group (insn);
35581 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
35582 &num_imm64_operand);
35584 fprintf (file, " insn info:\n");
35585 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
35586 group_name[group], path, byte_len);
35587 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
35588 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
35591 /* Print to STDERR the status of the ready list with respect to
35592 dispatch windows. */
35594 DEBUG_FUNCTION void
35595 debug_ready_dispatch (void)
35598 int no_ready = number_in_ready ();
35600 fprintf (stdout, "Number of ready: %d\n", no_ready);
35602 for (i = 0; i < no_ready; i++)
35603 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
35606 /* This routine is the driver of the dispatch scheduler. */
35609 do_dispatch (rtx insn, int mode)
35611 if (mode == DISPATCH_INIT)
35612 init_dispatch_sched ();
35613 else if (mode == ADD_TO_DISPATCH_WINDOW)
35614 add_to_dispatch_window (insn);
35617 /* Return TRUE if Dispatch Scheduling is supported. */
35620 has_dispatch (rtx insn, int action)
35622 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
35623 && flag_dispatch_scheduler)
35629 case IS_DISPATCH_ON:
35634 return is_cmp (insn);
35636 case DISPATCH_VIOLATION:
35637 return dispatch_violation ();
35639 case FITS_DISPATCH_WINDOW:
35640 return fits_dispatch_window (insn);
35646 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
35647 place emms and femms instructions. */
35649 static enum machine_mode
35650 ix86_preferred_simd_mode (enum machine_mode mode)
35658 return TARGET_AVX2 ? V32QImode : V16QImode;
35660 return TARGET_AVX2 ? V16HImode : V8HImode;
35662 return TARGET_AVX2 ? V8SImode : V4SImode;
35664 return TARGET_AVX2 ? V4DImode : V2DImode;
35667 if (TARGET_AVX && !TARGET_PREFER_AVX128)
35673 if (!TARGET_VECTORIZE_DOUBLE)
35675 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
35677 else if (TARGET_SSE2)
35686 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
35689 static unsigned int
35690 ix86_autovectorize_vector_sizes (void)
35692 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
35695 /* Initialize the GCC target structure. */
35696 #undef TARGET_RETURN_IN_MEMORY
35697 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
35699 #undef TARGET_LEGITIMIZE_ADDRESS
35700 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
35702 #undef TARGET_ATTRIBUTE_TABLE
35703 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
35704 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35705 # undef TARGET_MERGE_DECL_ATTRIBUTES
35706 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
35709 #undef TARGET_COMP_TYPE_ATTRIBUTES
35710 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
35712 #undef TARGET_INIT_BUILTINS
35713 #define TARGET_INIT_BUILTINS ix86_init_builtins
35714 #undef TARGET_BUILTIN_DECL
35715 #define TARGET_BUILTIN_DECL ix86_builtin_decl
35716 #undef TARGET_EXPAND_BUILTIN
35717 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
35719 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
35720 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
35721 ix86_builtin_vectorized_function
35723 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
35724 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
35726 #undef TARGET_BUILTIN_RECIPROCAL
35727 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
35729 #undef TARGET_ASM_FUNCTION_EPILOGUE
35730 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
35732 #undef TARGET_ENCODE_SECTION_INFO
35733 #ifndef SUBTARGET_ENCODE_SECTION_INFO
35734 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
35736 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
35739 #undef TARGET_ASM_OPEN_PAREN
35740 #define TARGET_ASM_OPEN_PAREN ""
35741 #undef TARGET_ASM_CLOSE_PAREN
35742 #define TARGET_ASM_CLOSE_PAREN ""
35744 #undef TARGET_ASM_BYTE_OP
35745 #define TARGET_ASM_BYTE_OP ASM_BYTE
35747 #undef TARGET_ASM_ALIGNED_HI_OP
35748 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
35749 #undef TARGET_ASM_ALIGNED_SI_OP
35750 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
35752 #undef TARGET_ASM_ALIGNED_DI_OP
35753 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
35756 #undef TARGET_PROFILE_BEFORE_PROLOGUE
35757 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
35759 #undef TARGET_ASM_UNALIGNED_HI_OP
35760 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
35761 #undef TARGET_ASM_UNALIGNED_SI_OP
35762 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
35763 #undef TARGET_ASM_UNALIGNED_DI_OP
35764 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
35766 #undef TARGET_PRINT_OPERAND
35767 #define TARGET_PRINT_OPERAND ix86_print_operand
35768 #undef TARGET_PRINT_OPERAND_ADDRESS
35769 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
35770 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
35771 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
35772 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
35773 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
35775 #undef TARGET_SCHED_INIT_GLOBAL
35776 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
35777 #undef TARGET_SCHED_ADJUST_COST
35778 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
35779 #undef TARGET_SCHED_ISSUE_RATE
35780 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
35781 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
35782 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
35783 ia32_multipass_dfa_lookahead
35785 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
35786 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
35789 #undef TARGET_HAVE_TLS
35790 #define TARGET_HAVE_TLS true
35792 #undef TARGET_CANNOT_FORCE_CONST_MEM
35793 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
35794 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
35795 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
35797 #undef TARGET_DELEGITIMIZE_ADDRESS
35798 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
35800 #undef TARGET_MS_BITFIELD_LAYOUT_P
35801 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
35804 #undef TARGET_BINDS_LOCAL_P
35805 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
35807 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35808 #undef TARGET_BINDS_LOCAL_P
35809 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
35812 #undef TARGET_ASM_OUTPUT_MI_THUNK
35813 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
35814 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
35815 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
35817 #undef TARGET_ASM_FILE_START
35818 #define TARGET_ASM_FILE_START x86_file_start
35820 #undef TARGET_OPTION_OVERRIDE
35821 #define TARGET_OPTION_OVERRIDE ix86_option_override
35823 #undef TARGET_REGISTER_MOVE_COST
35824 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
35825 #undef TARGET_MEMORY_MOVE_COST
35826 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
35827 #undef TARGET_RTX_COSTS
35828 #define TARGET_RTX_COSTS ix86_rtx_costs
35829 #undef TARGET_ADDRESS_COST
35830 #define TARGET_ADDRESS_COST ix86_address_cost
35832 #undef TARGET_FIXED_CONDITION_CODE_REGS
35833 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
35834 #undef TARGET_CC_MODES_COMPATIBLE
35835 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
35837 #undef TARGET_MACHINE_DEPENDENT_REORG
35838 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
35840 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
35841 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
35843 #undef TARGET_BUILD_BUILTIN_VA_LIST
35844 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
35846 #undef TARGET_ENUM_VA_LIST_P
35847 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
35849 #undef TARGET_FN_ABI_VA_LIST
35850 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
35852 #undef TARGET_CANONICAL_VA_LIST_TYPE
35853 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
35855 #undef TARGET_EXPAND_BUILTIN_VA_START
35856 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
35858 #undef TARGET_MD_ASM_CLOBBERS
35859 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
35861 #undef TARGET_PROMOTE_PROTOTYPES
35862 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
35863 #undef TARGET_STRUCT_VALUE_RTX
35864 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
35865 #undef TARGET_SETUP_INCOMING_VARARGS
35866 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
35867 #undef TARGET_MUST_PASS_IN_STACK
35868 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
35869 #undef TARGET_FUNCTION_ARG_ADVANCE
35870 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
35871 #undef TARGET_FUNCTION_ARG
35872 #define TARGET_FUNCTION_ARG ix86_function_arg
35873 #undef TARGET_FUNCTION_ARG_BOUNDARY
35874 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
35875 #undef TARGET_PASS_BY_REFERENCE
35876 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
35877 #undef TARGET_INTERNAL_ARG_POINTER
35878 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
35879 #undef TARGET_UPDATE_STACK_BOUNDARY
35880 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
35881 #undef TARGET_GET_DRAP_RTX
35882 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
35883 #undef TARGET_STRICT_ARGUMENT_NAMING
35884 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
35885 #undef TARGET_STATIC_CHAIN
35886 #define TARGET_STATIC_CHAIN ix86_static_chain
35887 #undef TARGET_TRAMPOLINE_INIT
35888 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
35889 #undef TARGET_RETURN_POPS_ARGS
35890 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
35892 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
35893 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
35895 #undef TARGET_SCALAR_MODE_SUPPORTED_P
35896 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
35898 #undef TARGET_VECTOR_MODE_SUPPORTED_P
35899 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
35901 #undef TARGET_C_MODE_FOR_SUFFIX
35902 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
35905 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
35906 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
35909 #ifdef SUBTARGET_INSERT_ATTRIBUTES
35910 #undef TARGET_INSERT_ATTRIBUTES
35911 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
35914 #undef TARGET_MANGLE_TYPE
35915 #define TARGET_MANGLE_TYPE ix86_mangle_type
35917 #ifndef TARGET_MACHO
35918 #undef TARGET_STACK_PROTECT_FAIL
35919 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
35922 #undef TARGET_FUNCTION_VALUE
35923 #define TARGET_FUNCTION_VALUE ix86_function_value
35925 #undef TARGET_FUNCTION_VALUE_REGNO_P
35926 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
35928 #undef TARGET_PROMOTE_FUNCTION_MODE
35929 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
35931 #undef TARGET_SECONDARY_RELOAD
35932 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
35934 #undef TARGET_CLASS_MAX_NREGS
35935 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
35937 #undef TARGET_PREFERRED_RELOAD_CLASS
35938 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
35939 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
35940 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
35941 #undef TARGET_CLASS_LIKELY_SPILLED_P
35942 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
35944 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
35945 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
35946 ix86_builtin_vectorization_cost
35947 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
35948 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
35949 ix86_vectorize_builtin_vec_perm
35950 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
35951 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
35952 ix86_vectorize_builtin_vec_perm_ok
35953 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
35954 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
35955 ix86_preferred_simd_mode
35956 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
35957 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
35958 ix86_autovectorize_vector_sizes
35960 #undef TARGET_SET_CURRENT_FUNCTION
35961 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
35963 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
35964 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
35966 #undef TARGET_OPTION_SAVE
35967 #define TARGET_OPTION_SAVE ix86_function_specific_save
35969 #undef TARGET_OPTION_RESTORE
35970 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
35972 #undef TARGET_OPTION_PRINT
35973 #define TARGET_OPTION_PRINT ix86_function_specific_print
35975 #undef TARGET_CAN_INLINE_P
35976 #define TARGET_CAN_INLINE_P ix86_can_inline_p
35978 #undef TARGET_EXPAND_TO_RTL_HOOK
35979 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
35981 #undef TARGET_LEGITIMATE_ADDRESS_P
35982 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
35984 #undef TARGET_LEGITIMATE_CONSTANT_P
35985 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
35987 #undef TARGET_FRAME_POINTER_REQUIRED
35988 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
35990 #undef TARGET_CAN_ELIMINATE
35991 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
35993 #undef TARGET_EXTRA_LIVE_ON_ENTRY
35994 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
35996 #undef TARGET_ASM_CODE_END
35997 #define TARGET_ASM_CODE_END ix86_code_end
35999 #undef TARGET_CONDITIONAL_REGISTER_USAGE
36000 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
36003 #undef TARGET_INIT_LIBFUNCS
36004 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
36007 struct gcc_target targetm = TARGET_INITIALIZER;
36009 #include "gt-i386.h"