1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
131 rtx vzeroupper_insn = NULL_RTX;
136 if (BLOCK_INFO (bb)->unchanged)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb)->state = state;
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
154 BLOCK_INFO (bb)->prev = state;
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
165 while (insn != bb_end)
167 insn = NEXT_INSN (insn);
169 if (!NONDEBUG_INSN_P (insn))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
175 if (!vzeroupper_insn)
178 if (PREV_INSN (insn) != vzeroupper_insn)
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
190 vzeroupper_insn = NULL_RTX;
194 pat = PATTERN (insn);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
224 else if (state != used)
226 note_stores (pat, check_avx256_stores, &state);
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256 != callee_return_pass_avx256)
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
277 vzeroupper_insn = insn;
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
303 enum upper_128bits_state state, old_state, new_state;
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
310 if (BLOCK_INFO (block)->processed)
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
321 switch (BLOCK_INFO (e->src)->state)
324 if (!unknown_is_unused)
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state != old_state)
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
385 BLOCK_INFO (e->dest)->processed = true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
416 fprintf (dump_file, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending))
420 fibheap_swap = pending;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
427 sbitmap_zero (visited);
429 cfun->machine->rescan_vzeroupper_p = 0;
431 while (!fibheap_empty (worklist))
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
440 SET_BIT (visited, bb->index);
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
449 if (TEST_BIT (visited, e->dest->index))
451 if (!TEST_BIT (in_pending, e->dest->index))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
460 else if (!TEST_BIT (in_worklist, e->dest->index))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
471 if (!cfun->machine->rescan_vzeroupper_p)
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
483 fprintf (dump_file, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb, true);
488 free_aux_for_blocks ();
491 static rtx legitimize_dllimport_symbol (rtx, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
583 struct processor_costs i386_cost = { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs i486_cost = { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentium_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
778 DUMMY_STRINGOP_ALGS},
779 {{libcall, {{-1, rep_prefix_4_byte}}},
780 DUMMY_STRINGOP_ALGS},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
795 struct processor_costs pentiumpro_cost = {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
853 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
854 DUMMY_STRINGOP_ALGS},
855 {{rep_prefix_4_byte, {{1024, unrolled_loop},
856 {8192, rep_prefix_4_byte}, {-1, libcall}}},
857 DUMMY_STRINGOP_ALGS},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
872 struct processor_costs geode_cost = {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs k6_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs athlon_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1089 struct processor_costs k8_cost = {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1150 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1151 {{libcall, {{8, loop}, {24, unrolled_loop},
1152 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1153 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost = {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1212 MOVD reg64, xmmreg Double FADD 3
1214 MOVD reg32, xmmreg Double FADD 3
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1237 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1238 {{libcall, {{8, loop}, {24, unrolled_loop},
1239 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1240 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost = {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1299 MOVD reg64, xmmreg Double FADD 3
1301 MOVD reg32, xmmreg Double FADD 3
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1324 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1325 {{libcall, {{8, loop}, {24, unrolled_loop},
1326 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1327 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs bdver2_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1386 MOVD reg64, xmmreg Double FADD 3
1388 MOVD reg32, xmmreg Double FADD 3
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1411 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1412 {{libcall, {{8, loop}, {24, unrolled_loop},
1413 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1414 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1428 struct processor_costs btver1_cost = {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1473 MOVD reg64, xmmreg Double FADD 3
1475 MOVD reg32, xmmreg Double FADD 3
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1493 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1494 {{libcall, {{8, loop}, {24, unrolled_loop},
1495 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1496 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1511 struct processor_costs pentium4_cost = {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1564 DUMMY_STRINGOP_ALGS},
1565 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1567 DUMMY_STRINGOP_ALGS},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1582 struct processor_costs nocona_cost = {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1635 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1636 {100000, unrolled_loop}, {-1, libcall}}}},
1637 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1639 {libcall, {{24, loop}, {64, unrolled_loop},
1640 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1655 struct processor_costs atom_cost = {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 4, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1708 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1709 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1710 {{libcall, {{8, loop}, {15, unrolled_loop},
1711 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1712 {libcall, {{24, loop}, {32, unrolled_loop},
1713 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1729 struct processor_costs generic64_cost = {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS,
1788 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1789 {DUMMY_STRINGOP_ALGS,
1790 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1807 struct processor_costs generic32_cost = {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1860 DUMMY_STRINGOP_ALGS},
1861 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1862 DUMMY_STRINGOP_ALGS},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1876 const struct processor_costs *ix86_cost = &pentium_cost;
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features[X86_TUNE_LAST];
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7 | m_GENERIC,
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386 | m_486 | m_K6_GEODE,
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1972 /* X86_TUNE_USE_MOV0 */
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1987 /* X86_TUNE_READ_MODIFY */
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386 | m_486 | m_PENT),
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386 | m_P4_NOCONA,
1999 /* X86_TUNE_QIMODE_MATH */
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386 | m_486 | m_PENT | m_PPRO,
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386 | m_486 | m_PENT | m_K6_GEODE,
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT | m_K6_GEODE,
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO | m_P4_NOCONA,
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2078 /* X86_TUNE_SHIFT1 */
2081 /* X86_TUNE_USE_FFREEP */
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE | m_GENERIC),
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10 | m_BDVER ),
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2128 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2145 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2167 m_K6_GEODE | m_AMD_MULTIPLE,
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2173 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2174 during reassociation of integer computation. */
2177 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2178 during reassociation of fp computation. */
2182 /* Feature tests against the various architecture variations. */
2183 unsigned char ix86_arch_features[X86_ARCH_LAST];
2185 /* Feature tests against the various architecture variations, used to create
2186 ix86_arch_features based on the processor mask. */
2187 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2188 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2189 ~(m_386 | m_486 | m_PENT | m_K6),
2191 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2194 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2197 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2200 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2204 static const unsigned int x86_accumulate_outgoing_args
2205 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2207 static const unsigned int x86_arch_always_fancy_math_387
2208 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2210 static const unsigned int x86_avx256_split_unaligned_load
2211 = m_COREI7 | m_GENERIC;
2213 static const unsigned int x86_avx256_split_unaligned_store
2214 = m_COREI7 | m_BDVER | m_GENERIC;
2216 /* In case the average insn count for single function invocation is
2217 lower than this constant, emit fast (but longer) prologue and
2219 #define FAST_PROLOGUE_INSN_COUNT 20
2221 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2222 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2223 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2224 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2226 /* Array of the smallest class containing reg number REGNO, indexed by
2227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2229 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2231 /* ax, dx, cx, bx */
2232 AREG, DREG, CREG, BREG,
2233 /* si, di, bp, sp */
2234 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2236 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2237 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2240 /* flags, fpsr, fpcr, frame */
2241 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2243 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2246 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2249 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2250 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2251 /* SSE REX registers */
2252 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2256 /* The "default" register map used in 32bit mode. */
2258 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2260 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2261 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2262 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2263 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2264 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2269 /* The "default" register map used in 64bit mode. */
2271 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2273 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2274 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2275 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2276 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2277 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2278 8,9,10,11,12,13,14,15, /* extended integer registers */
2279 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2282 /* Define the register numbers to be used in Dwarf debugging information.
2283 The SVR4 reference port C compiler uses the following register numbers
2284 in its Dwarf output code:
2285 0 for %eax (gcc regno = 0)
2286 1 for %ecx (gcc regno = 2)
2287 2 for %edx (gcc regno = 1)
2288 3 for %ebx (gcc regno = 3)
2289 4 for %esp (gcc regno = 7)
2290 5 for %ebp (gcc regno = 6)
2291 6 for %esi (gcc regno = 4)
2292 7 for %edi (gcc regno = 5)
2293 The following three DWARF register numbers are never generated by
2294 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2295 believes these numbers have these meanings.
2296 8 for %eip (no gcc equivalent)
2297 9 for %eflags (gcc regno = 17)
2298 10 for %trapno (no gcc equivalent)
2299 It is not at all clear how we should number the FP stack registers
2300 for the x86 architecture. If the version of SDB on x86/svr4 were
2301 a bit less brain dead with respect to floating-point then we would
2302 have a precedent to follow with respect to DWARF register numbers
2303 for x86 FP registers, but the SDB on x86/svr4 is so completely
2304 broken with respect to FP registers that it is hardly worth thinking
2305 of it as something to strive for compatibility with.
2306 The version of x86/svr4 SDB I have at the moment does (partially)
2307 seem to believe that DWARF register number 11 is associated with
2308 the x86 register %st(0), but that's about all. Higher DWARF
2309 register numbers don't seem to be associated with anything in
2310 particular, and even for DWARF regno 11, SDB only seems to under-
2311 stand that it should say that a variable lives in %st(0) (when
2312 asked via an `=' command) if we said it was in DWARF regno 11,
2313 but SDB still prints garbage when asked for the value of the
2314 variable in question (via a `/' command).
2315 (Also note that the labels SDB prints for various FP stack regs
2316 when doing an `x' command are all wrong.)
2317 Note that these problems generally don't affect the native SVR4
2318 C compiler because it doesn't allow the use of -O with -g and
2319 because when it is *not* optimizing, it allocates a memory
2320 location for each floating-point variable, and the memory
2321 location is what gets described in the DWARF AT_location
2322 attribute for the variable in question.
2323 Regardless of the severe mental illness of the x86/svr4 SDB, we
2324 do something sensible here and we use the following DWARF
2325 register numbers. Note that these are all stack-top-relative
2327 11 for %st(0) (gcc regno = 8)
2328 12 for %st(1) (gcc regno = 9)
2329 13 for %st(2) (gcc regno = 10)
2330 14 for %st(3) (gcc regno = 11)
2331 15 for %st(4) (gcc regno = 12)
2332 16 for %st(5) (gcc regno = 13)
2333 17 for %st(6) (gcc regno = 14)
2334 18 for %st(7) (gcc regno = 15)
2336 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2338 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2339 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2340 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2341 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2342 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2343 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2347 /* Define parameter passing and return registers. */
2349 static int const x86_64_int_parameter_registers[6] =
2351 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2354 static int const x86_64_ms_abi_int_parameter_registers[4] =
2356 CX_REG, DX_REG, R8_REG, R9_REG
2359 static int const x86_64_int_return_registers[4] =
2361 AX_REG, DX_REG, DI_REG, SI_REG
2364 /* Define the structure for the machine field in struct function. */
2366 struct GTY(()) stack_local_entry {
2367 unsigned short mode;
2370 struct stack_local_entry *next;
2373 /* Structure describing stack frame layout.
2374 Stack grows downward:
2380 saved static chain if ix86_static_chain_on_stack
2382 saved frame pointer if frame_pointer_needed
2383 <- HARD_FRAME_POINTER
2389 <- sse_regs_save_offset
2392 [va_arg registers] |
2396 [padding2] | = to_allocate
2405 int outgoing_arguments_size;
2406 HOST_WIDE_INT frame;
2408 /* The offsets relative to ARG_POINTER. */
2409 HOST_WIDE_INT frame_pointer_offset;
2410 HOST_WIDE_INT hard_frame_pointer_offset;
2411 HOST_WIDE_INT stack_pointer_offset;
2412 HOST_WIDE_INT hfp_save_offset;
2413 HOST_WIDE_INT reg_save_offset;
2414 HOST_WIDE_INT sse_reg_save_offset;
2416 /* When save_regs_using_mov is set, emit prologue using
2417 move instead of push instructions. */
2418 bool save_regs_using_mov;
2421 /* Which cpu are we scheduling for. */
2422 enum attr_cpu ix86_schedule;
2424 /* Which cpu are we optimizing for. */
2425 enum processor_type ix86_tune;
2427 /* Which instruction set architecture to use. */
2428 enum processor_type ix86_arch;
2430 /* true if sse prefetch instruction is not NOOP. */
2431 int x86_prefetch_sse;
2433 /* -mstackrealign option */
2434 static const char ix86_force_align_arg_pointer_string[]
2435 = "force_align_arg_pointer";
2437 static rtx (*ix86_gen_leave) (void);
2438 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2439 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2440 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2441 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2442 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2443 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2444 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2445 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2446 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2448 /* Preferred alignment for stack boundary in bits. */
2449 unsigned int ix86_preferred_stack_boundary;
2451 /* Alignment for incoming stack boundary in bits specified at
2453 static unsigned int ix86_user_incoming_stack_boundary;
2455 /* Default alignment for incoming stack boundary in bits. */
2456 static unsigned int ix86_default_incoming_stack_boundary;
2458 /* Alignment for incoming stack boundary in bits. */
2459 unsigned int ix86_incoming_stack_boundary;
2461 /* Calling abi specific va_list type nodes. */
2462 static GTY(()) tree sysv_va_list_type_node;
2463 static GTY(()) tree ms_va_list_type_node;
2465 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2466 char internal_label_prefix[16];
2467 int internal_label_prefix_len;
2469 /* Fence to use after loop using movnt. */
2472 /* Register class used for passing given 64bit part of the argument.
2473 These represent classes as documented by the PS ABI, with the exception
2474 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2475 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2477 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2478 whenever possible (upper half does contain padding). */
2479 enum x86_64_reg_class
2482 X86_64_INTEGER_CLASS,
2483 X86_64_INTEGERSI_CLASS,
2490 X86_64_COMPLEX_X87_CLASS,
2494 #define MAX_CLASSES 4
2496 /* Table of constants used by fldpi, fldln2, etc.... */
2497 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2498 static bool ext_80387_constants_init = 0;
2501 static struct machine_function * ix86_init_machine_status (void);
2502 static rtx ix86_function_value (const_tree, const_tree, bool);
2503 static bool ix86_function_value_regno_p (const unsigned int);
2504 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2506 static rtx ix86_static_chain (const_tree, bool);
2507 static int ix86_function_regparm (const_tree, const_tree);
2508 static void ix86_compute_frame_layout (struct ix86_frame *);
2509 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2511 static void ix86_add_new_builtins (HOST_WIDE_INT);
2512 static tree ix86_canonical_va_list_type (tree);
2513 static void predict_jump (int);
2514 static unsigned int split_stack_prologue_scratch_regno (void);
2515 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2517 enum ix86_function_specific_strings
2519 IX86_FUNCTION_SPECIFIC_ARCH,
2520 IX86_FUNCTION_SPECIFIC_TUNE,
2521 IX86_FUNCTION_SPECIFIC_MAX
2524 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2525 const char *, enum fpmath_unit, bool);
2526 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2527 static void ix86_function_specific_save (struct cl_target_option *);
2528 static void ix86_function_specific_restore (struct cl_target_option *);
2529 static void ix86_function_specific_print (FILE *, int,
2530 struct cl_target_option *);
2531 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2532 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2533 struct gcc_options *);
2534 static bool ix86_can_inline_p (tree, tree);
2535 static void ix86_set_current_function (tree);
2536 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2538 static enum calling_abi ix86_function_abi (const_tree);
2541 #ifndef SUBTARGET32_DEFAULT_CPU
2542 #define SUBTARGET32_DEFAULT_CPU "i386"
2545 /* The svr4 ABI for the i386 says that records and unions are returned
2547 #ifndef DEFAULT_PCC_STRUCT_RETURN
2548 #define DEFAULT_PCC_STRUCT_RETURN 1
2551 /* Whether -mtune= or -march= were specified */
2552 static int ix86_tune_defaulted;
2553 static int ix86_arch_specified;
2555 /* Vectorization library interface and handlers. */
2556 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2558 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2559 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2561 /* Processor target table, indexed by processor number */
2564 const struct processor_costs *cost; /* Processor costs */
2565 const int align_loop; /* Default alignments. */
2566 const int align_loop_max_skip;
2567 const int align_jump;
2568 const int align_jump_max_skip;
2569 const int align_func;
2572 static const struct ptt processor_target_table[PROCESSOR_max] =
2574 {&i386_cost, 4, 3, 4, 3, 4},
2575 {&i486_cost, 16, 15, 16, 15, 16},
2576 {&pentium_cost, 16, 7, 16, 7, 16},
2577 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2578 {&geode_cost, 0, 0, 0, 0, 0},
2579 {&k6_cost, 32, 7, 32, 7, 32},
2580 {&athlon_cost, 16, 7, 16, 7, 16},
2581 {&pentium4_cost, 0, 0, 0, 0, 0},
2582 {&k8_cost, 16, 7, 16, 7, 16},
2583 {&nocona_cost, 0, 0, 0, 0, 0},
2584 /* Core 2 32-bit. */
2585 {&generic32_cost, 16, 10, 16, 10, 16},
2586 /* Core 2 64-bit. */
2587 {&generic64_cost, 16, 10, 16, 10, 16},
2588 /* Core i7 32-bit. */
2589 {&generic32_cost, 16, 10, 16, 10, 16},
2590 /* Core i7 64-bit. */
2591 {&generic64_cost, 16, 10, 16, 10, 16},
2592 {&generic32_cost, 16, 7, 16, 7, 16},
2593 {&generic64_cost, 16, 10, 16, 10, 16},
2594 {&amdfam10_cost, 32, 24, 32, 7, 32},
2595 {&bdver1_cost, 32, 24, 32, 7, 32},
2596 {&bdver2_cost, 32, 24, 32, 7, 32},
2597 {&btver1_cost, 32, 24, 32, 7, 32},
2598 {&atom_cost, 16, 15, 16, 7, 16}
2601 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2631 /* Return true if a red-zone is in use. */
2634 ix86_using_red_zone (void)
2636 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2639 /* Return a string that documents the current -m options. The caller is
2640 responsible for freeing the string. */
2643 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2644 const char *tune, enum fpmath_unit fpmath,
2647 struct ix86_target_opts
2649 const char *option; /* option string */
2650 HOST_WIDE_INT mask; /* isa mask options */
2653 /* This table is ordered so that options like -msse4.2 that imply
2654 preceding options while match those first. */
2655 static struct ix86_target_opts isa_opts[] =
2657 { "-m64", OPTION_MASK_ISA_64BIT },
2658 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2659 { "-mfma", OPTION_MASK_ISA_FMA },
2660 { "-mxop", OPTION_MASK_ISA_XOP },
2661 { "-mlwp", OPTION_MASK_ISA_LWP },
2662 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2663 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2664 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2665 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2666 { "-msse3", OPTION_MASK_ISA_SSE3 },
2667 { "-msse2", OPTION_MASK_ISA_SSE2 },
2668 { "-msse", OPTION_MASK_ISA_SSE },
2669 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2670 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2671 { "-mmmx", OPTION_MASK_ISA_MMX },
2672 { "-mabm", OPTION_MASK_ISA_ABM },
2673 { "-mbmi", OPTION_MASK_ISA_BMI },
2674 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2675 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2676 { "-mtbm", OPTION_MASK_ISA_TBM },
2677 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2678 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2679 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2680 { "-maes", OPTION_MASK_ISA_AES },
2681 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2682 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2683 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2684 { "-mf16c", OPTION_MASK_ISA_F16C },
2688 static struct ix86_target_opts flag_opts[] =
2690 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2691 { "-m80387", MASK_80387 },
2692 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2693 { "-malign-double", MASK_ALIGN_DOUBLE },
2694 { "-mcld", MASK_CLD },
2695 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2696 { "-mieee-fp", MASK_IEEE_FP },
2697 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2698 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2699 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2700 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2701 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2702 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2703 { "-mno-red-zone", MASK_NO_RED_ZONE },
2704 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2705 { "-mrecip", MASK_RECIP },
2706 { "-mrtd", MASK_RTD },
2707 { "-msseregparm", MASK_SSEREGPARM },
2708 { "-mstack-arg-probe", MASK_STACK_PROBE },
2709 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2710 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2711 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2712 { "-mvzeroupper", MASK_VZEROUPPER },
2713 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2714 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2715 { "-mprefer-avx128", MASK_PREFER_AVX128},
2718 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2721 char target_other[40];
2730 memset (opts, '\0', sizeof (opts));
2732 /* Add -march= option. */
2735 opts[num][0] = "-march=";
2736 opts[num++][1] = arch;
2739 /* Add -mtune= option. */
2742 opts[num][0] = "-mtune=";
2743 opts[num++][1] = tune;
2746 /* Pick out the options in isa options. */
2747 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2749 if ((isa & isa_opts[i].mask) != 0)
2751 opts[num++][0] = isa_opts[i].option;
2752 isa &= ~ isa_opts[i].mask;
2756 if (isa && add_nl_p)
2758 opts[num++][0] = isa_other;
2759 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2763 /* Add flag options. */
2764 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2766 if ((flags & flag_opts[i].mask) != 0)
2768 opts[num++][0] = flag_opts[i].option;
2769 flags &= ~ flag_opts[i].mask;
2773 if (flags && add_nl_p)
2775 opts[num++][0] = target_other;
2776 sprintf (target_other, "(other flags: %#x)", flags);
2779 /* Add -fpmath= option. */
2782 opts[num][0] = "-mfpmath=";
2783 switch ((int) fpmath)
2786 opts[num++][1] = "387";
2790 opts[num++][1] = "sse";
2793 case FPMATH_387 | FPMATH_SSE:
2794 opts[num++][1] = "sse+387";
2806 gcc_assert (num < ARRAY_SIZE (opts));
2808 /* Size the string. */
2810 sep_len = (add_nl_p) ? 3 : 1;
2811 for (i = 0; i < num; i++)
2814 for (j = 0; j < 2; j++)
2816 len += strlen (opts[i][j]);
2819 /* Build the string. */
2820 ret = ptr = (char *) xmalloc (len);
2823 for (i = 0; i < num; i++)
2827 for (j = 0; j < 2; j++)
2828 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2835 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2843 for (j = 0; j < 2; j++)
2846 memcpy (ptr, opts[i][j], len2[j]);
2848 line_len += len2[j];
2853 gcc_assert (ret + len >= ptr);
2858 /* Return true, if profiling code should be emitted before
2859 prologue. Otherwise it returns false.
2860 Note: For x86 with "hotfix" it is sorried. */
2862 ix86_profile_before_prologue (void)
2864 return flag_fentry != 0;
2867 /* Function that is callable from the debugger to print the current
2870 ix86_debug_options (void)
2872 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2873 ix86_arch_string, ix86_tune_string,
2878 fprintf (stderr, "%s\n\n", opts);
2882 fputs ("<no options>\n\n", stderr);
2887 /* Override various settings based on options. If MAIN_ARGS_P, the
2888 options are from the command line, otherwise they are from
2892 ix86_option_override_internal (bool main_args_p)
2895 unsigned int ix86_arch_mask, ix86_tune_mask;
2896 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2901 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2902 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2903 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2904 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2905 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2906 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2907 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2908 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2909 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2910 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2911 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2912 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2913 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2914 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2915 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2916 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2917 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2918 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2919 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2920 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2921 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2922 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2923 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2924 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2925 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2926 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2927 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2928 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2929 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2930 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2931 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2932 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2933 /* if this reaches 64, need to widen struct pta flags below */
2937 const char *const name; /* processor name or nickname. */
2938 const enum processor_type processor;
2939 const enum attr_cpu schedule;
2940 const unsigned HOST_WIDE_INT flags;
2942 const processor_alias_table[] =
2944 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2945 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2946 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2947 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2948 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2949 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2950 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2951 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2952 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2953 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2954 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2955 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2956 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2958 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2960 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2961 PTA_MMX | PTA_SSE | PTA_SSE2},
2962 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2963 PTA_MMX |PTA_SSE | PTA_SSE2},
2964 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2965 PTA_MMX | PTA_SSE | PTA_SSE2},
2966 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2967 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2968 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2969 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2970 | PTA_CX16 | PTA_NO_SAHF},
2971 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2972 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2973 | PTA_SSSE3 | PTA_CX16},
2974 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2975 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2976 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2977 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2978 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2979 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2980 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2981 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
2982 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2983 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2984 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2985 | PTA_RDRND | PTA_F16C},
2986 {"core-avx2", PROCESSOR_COREI7_64, CPU_COREI7,
2987 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2988 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
2989 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2990 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
2991 | PTA_FMA | PTA_MOVBE},
2992 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2993 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2994 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2995 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2996 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2997 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2998 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2999 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3000 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3001 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3002 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3003 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3004 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3005 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3006 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3007 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3008 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3009 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3010 {"x86-64", PROCESSOR_K8, CPU_K8,
3011 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3012 {"k8", PROCESSOR_K8, CPU_K8,
3013 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3014 | PTA_SSE2 | PTA_NO_SAHF},
3015 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3016 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3017 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3018 {"opteron", PROCESSOR_K8, CPU_K8,
3019 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3020 | PTA_SSE2 | PTA_NO_SAHF},
3021 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3022 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3023 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3024 {"athlon64", PROCESSOR_K8, CPU_K8,
3025 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3026 | PTA_SSE2 | PTA_NO_SAHF},
3027 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3028 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3029 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3030 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3031 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3032 | PTA_SSE2 | PTA_NO_SAHF},
3033 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3034 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3035 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3036 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3037 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3038 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3039 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3040 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3041 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3042 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3043 | PTA_XOP | PTA_LWP},
3044 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3045 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3046 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3047 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3048 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3050 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3051 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3052 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3053 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3054 0 /* flags are only used for -march switch. */ },
3055 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3056 PTA_64BIT /* flags are only used for -march switch. */ },
3059 /* -mrecip options. */
3062 const char *string; /* option name */
3063 unsigned int mask; /* mask bits to set */
3065 const recip_options[] =
3067 { "all", RECIP_MASK_ALL },
3068 { "none", RECIP_MASK_NONE },
3069 { "div", RECIP_MASK_DIV },
3070 { "sqrt", RECIP_MASK_SQRT },
3071 { "vec-div", RECIP_MASK_VEC_DIV },
3072 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3075 int const pta_size = ARRAY_SIZE (processor_alias_table);
3077 /* Set up prefix/suffix so the error messages refer to either the command
3078 line argument, or the attribute(target). */
3087 prefix = "option(\"";
3092 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3093 SUBTARGET_OVERRIDE_OPTIONS;
3096 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3097 SUBSUBTARGET_OVERRIDE_OPTIONS;
3101 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3103 /* -fPIC is the default for x86_64. */
3104 if (TARGET_MACHO && TARGET_64BIT)
3107 /* Need to check -mtune=generic first. */
3108 if (ix86_tune_string)
3110 if (!strcmp (ix86_tune_string, "generic")
3111 || !strcmp (ix86_tune_string, "i686")
3112 /* As special support for cross compilers we read -mtune=native
3113 as -mtune=generic. With native compilers we won't see the
3114 -mtune=native, as it was changed by the driver. */
3115 || !strcmp (ix86_tune_string, "native"))
3118 ix86_tune_string = "generic64";
3120 ix86_tune_string = "generic32";
3122 /* If this call is for setting the option attribute, allow the
3123 generic32/generic64 that was previously set. */
3124 else if (!main_args_p
3125 && (!strcmp (ix86_tune_string, "generic32")
3126 || !strcmp (ix86_tune_string, "generic64")))
3128 else if (!strncmp (ix86_tune_string, "generic", 7))
3129 error ("bad value (%s) for %stune=%s %s",
3130 ix86_tune_string, prefix, suffix, sw);
3131 else if (!strcmp (ix86_tune_string, "x86-64"))
3132 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3133 "%stune=k8%s or %stune=generic%s instead as appropriate",
3134 prefix, suffix, prefix, suffix, prefix, suffix);
3138 if (ix86_arch_string)
3139 ix86_tune_string = ix86_arch_string;
3140 if (!ix86_tune_string)
3142 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3143 ix86_tune_defaulted = 1;
3146 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3147 need to use a sensible tune option. */
3148 if (!strcmp (ix86_tune_string, "generic")
3149 || !strcmp (ix86_tune_string, "x86-64")
3150 || !strcmp (ix86_tune_string, "i686"))
3153 ix86_tune_string = "generic64";
3155 ix86_tune_string = "generic32";
3159 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3161 /* rep; movq isn't available in 32-bit code. */
3162 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3163 ix86_stringop_alg = no_stringop;
3166 if (!ix86_arch_string)
3167 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3169 ix86_arch_specified = 1;
3171 if (!global_options_set.x_ix86_abi)
3172 ix86_abi = DEFAULT_ABI;
3174 if (global_options_set.x_ix86_cmodel)
3176 switch (ix86_cmodel)
3181 ix86_cmodel = CM_SMALL_PIC;
3183 error ("code model %qs not supported in the %s bit mode",
3190 ix86_cmodel = CM_MEDIUM_PIC;
3192 error ("code model %qs not supported in the %s bit mode",
3194 else if (TARGET_X32)
3195 error ("code model %qs not supported in x32 mode",
3202 ix86_cmodel = CM_LARGE_PIC;
3204 error ("code model %qs not supported in the %s bit mode",
3206 else if (TARGET_X32)
3207 error ("code model %qs not supported in x32 mode",
3213 error ("code model %s does not support PIC mode", "32");
3215 error ("code model %qs not supported in the %s bit mode",
3222 error ("code model %s does not support PIC mode", "kernel");
3223 ix86_cmodel = CM_32;
3226 error ("code model %qs not supported in the %s bit mode",
3236 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3237 use of rip-relative addressing. This eliminates fixups that
3238 would otherwise be needed if this object is to be placed in a
3239 DLL, and is essentially just as efficient as direct addressing. */
3240 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3241 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3242 else if (TARGET_64BIT)
3243 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3245 ix86_cmodel = CM_32;
3247 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3249 error ("-masm=intel not supported in this configuration");
3250 ix86_asm_dialect = ASM_ATT;
3252 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3253 sorry ("%i-bit mode not compiled in",
3254 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3256 for (i = 0; i < pta_size; i++)
3257 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3259 ix86_schedule = processor_alias_table[i].schedule;
3260 ix86_arch = processor_alias_table[i].processor;
3261 /* Default cpu tuning to the architecture. */
3262 ix86_tune = ix86_arch;
3264 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3265 error ("CPU you selected does not support x86-64 "
3268 if (processor_alias_table[i].flags & PTA_MMX
3269 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3270 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3271 if (processor_alias_table[i].flags & PTA_3DNOW
3272 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3273 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3274 if (processor_alias_table[i].flags & PTA_3DNOW_A
3275 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3276 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3277 if (processor_alias_table[i].flags & PTA_SSE
3278 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3279 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3280 if (processor_alias_table[i].flags & PTA_SSE2
3281 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3282 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3283 if (processor_alias_table[i].flags & PTA_SSE3
3284 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3285 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3286 if (processor_alias_table[i].flags & PTA_SSSE3
3287 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3288 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3289 if (processor_alias_table[i].flags & PTA_SSE4_1
3290 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3291 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3292 if (processor_alias_table[i].flags & PTA_SSE4_2
3293 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3294 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3295 if (processor_alias_table[i].flags & PTA_AVX
3296 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3297 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3298 if (processor_alias_table[i].flags & PTA_AVX2
3299 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3300 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3301 if (processor_alias_table[i].flags & PTA_FMA
3302 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3303 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3304 if (processor_alias_table[i].flags & PTA_SSE4A
3305 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3306 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3307 if (processor_alias_table[i].flags & PTA_FMA4
3308 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3309 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3310 if (processor_alias_table[i].flags & PTA_XOP
3311 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3312 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3313 if (processor_alias_table[i].flags & PTA_LWP
3314 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3315 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3316 if (processor_alias_table[i].flags & PTA_ABM
3317 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3318 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3319 if (processor_alias_table[i].flags & PTA_BMI
3320 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3321 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3322 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3323 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3324 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3325 if (processor_alias_table[i].flags & PTA_TBM
3326 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3327 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3328 if (processor_alias_table[i].flags & PTA_BMI2
3329 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3330 ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3331 if (processor_alias_table[i].flags & PTA_CX16
3332 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3333 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3334 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3335 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3336 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3337 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3338 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3339 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3340 if (processor_alias_table[i].flags & PTA_MOVBE
3341 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3342 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3343 if (processor_alias_table[i].flags & PTA_AES
3344 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3345 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3346 if (processor_alias_table[i].flags & PTA_PCLMUL
3347 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3348 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3349 if (processor_alias_table[i].flags & PTA_FSGSBASE
3350 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3351 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3352 if (processor_alias_table[i].flags & PTA_RDRND
3353 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3354 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3355 if (processor_alias_table[i].flags & PTA_F16C
3356 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3357 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3358 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3359 x86_prefetch_sse = true;
3364 if (!strcmp (ix86_arch_string, "generic"))
3365 error ("generic CPU can be used only for %stune=%s %s",
3366 prefix, suffix, sw);
3367 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3368 error ("bad value (%s) for %sarch=%s %s",
3369 ix86_arch_string, prefix, suffix, sw);
3371 ix86_arch_mask = 1u << ix86_arch;
3372 for (i = 0; i < X86_ARCH_LAST; ++i)
3373 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3375 for (i = 0; i < pta_size; i++)
3376 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3378 ix86_schedule = processor_alias_table[i].schedule;
3379 ix86_tune = processor_alias_table[i].processor;
3382 if (!(processor_alias_table[i].flags & PTA_64BIT))
3384 if (ix86_tune_defaulted)
3386 ix86_tune_string = "x86-64";
3387 for (i = 0; i < pta_size; i++)
3388 if (! strcmp (ix86_tune_string,
3389 processor_alias_table[i].name))
3391 ix86_schedule = processor_alias_table[i].schedule;
3392 ix86_tune = processor_alias_table[i].processor;
3395 error ("CPU you selected does not support x86-64 "
3401 /* Adjust tuning when compiling for 32-bit ABI. */
3404 case PROCESSOR_GENERIC64:
3405 ix86_tune = PROCESSOR_GENERIC32;
3406 ix86_schedule = CPU_PENTIUMPRO;
3409 case PROCESSOR_CORE2_64:
3410 ix86_tune = PROCESSOR_CORE2_32;
3413 case PROCESSOR_COREI7_64:
3414 ix86_tune = PROCESSOR_COREI7_32;
3421 /* Intel CPUs have always interpreted SSE prefetch instructions as
3422 NOPs; so, we can enable SSE prefetch instructions even when
3423 -mtune (rather than -march) points us to a processor that has them.
3424 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3425 higher processors. */
3427 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3428 x86_prefetch_sse = true;
3432 if (ix86_tune_specified && i == pta_size)
3433 error ("bad value (%s) for %stune=%s %s",
3434 ix86_tune_string, prefix, suffix, sw);
3436 ix86_tune_mask = 1u << ix86_tune;
3437 for (i = 0; i < X86_TUNE_LAST; ++i)
3438 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3440 #ifndef USE_IX86_FRAME_POINTER
3441 #define USE_IX86_FRAME_POINTER 0
3444 #ifndef USE_X86_64_FRAME_POINTER
3445 #define USE_X86_64_FRAME_POINTER 0
3448 /* Set the default values for switches whose default depends on TARGET_64BIT
3449 in case they weren't overwritten by command line options. */
3452 if (optimize > 1 && !global_options_set.x_flag_zee)
3454 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3455 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3456 if (flag_asynchronous_unwind_tables == 2)
3457 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3458 if (flag_pcc_struct_return == 2)
3459 flag_pcc_struct_return = 0;
3463 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3464 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3465 if (flag_asynchronous_unwind_tables == 2)
3466 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3467 if (flag_pcc_struct_return == 2)
3468 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3472 ix86_cost = &ix86_size_cost;
3474 ix86_cost = processor_target_table[ix86_tune].cost;
3476 /* Arrange to set up i386_stack_locals for all functions. */
3477 init_machine_status = ix86_init_machine_status;
3479 /* Validate -mregparm= value. */
3480 if (global_options_set.x_ix86_regparm)
3483 warning (0, "-mregparm is ignored in 64-bit mode");
3484 if (ix86_regparm > REGPARM_MAX)
3486 error ("-mregparm=%d is not between 0 and %d",
3487 ix86_regparm, REGPARM_MAX);
3492 ix86_regparm = REGPARM_MAX;
3494 /* Default align_* from the processor table. */
3495 if (align_loops == 0)
3497 align_loops = processor_target_table[ix86_tune].align_loop;
3498 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3500 if (align_jumps == 0)
3502 align_jumps = processor_target_table[ix86_tune].align_jump;
3503 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3505 if (align_functions == 0)
3507 align_functions = processor_target_table[ix86_tune].align_func;
3510 /* Provide default for -mbranch-cost= value. */
3511 if (!global_options_set.x_ix86_branch_cost)
3512 ix86_branch_cost = ix86_cost->branch_cost;
3516 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3518 /* Enable by default the SSE and MMX builtins. Do allow the user to
3519 explicitly disable any of these. In particular, disabling SSE and
3520 MMX for kernel code is extremely useful. */
3521 if (!ix86_arch_specified)
3523 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3524 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3527 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3531 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3533 if (!ix86_arch_specified)
3535 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3537 /* i386 ABI does not specify red zone. It still makes sense to use it
3538 when programmer takes care to stack from being destroyed. */
3539 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3540 target_flags |= MASK_NO_RED_ZONE;
3543 /* Keep nonleaf frame pointers. */
3544 if (flag_omit_frame_pointer)
3545 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3546 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3547 flag_omit_frame_pointer = 1;
3549 /* If we're doing fast math, we don't care about comparison order
3550 wrt NaNs. This lets us use a shorter comparison sequence. */
3551 if (flag_finite_math_only)
3552 target_flags &= ~MASK_IEEE_FP;
3554 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3555 since the insns won't need emulation. */
3556 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3557 target_flags &= ~MASK_NO_FANCY_MATH_387;
3559 /* Likewise, if the target doesn't have a 387, or we've specified
3560 software floating point, don't use 387 inline intrinsics. */
3562 target_flags |= MASK_NO_FANCY_MATH_387;
3564 /* Turn on MMX builtins for -msse. */
3567 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3568 x86_prefetch_sse = true;
3571 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3572 if (TARGET_SSE4_2 || TARGET_ABM)
3573 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3575 /* Turn on lzcnt instruction for -mabm. */
3577 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3579 /* Validate -mpreferred-stack-boundary= value or default it to
3580 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3581 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3582 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3584 int min = (TARGET_64BIT ? 4 : 2);
3585 int max = (TARGET_SEH ? 4 : 12);
3587 if (ix86_preferred_stack_boundary_arg < min
3588 || ix86_preferred_stack_boundary_arg > max)
3591 error ("-mpreferred-stack-boundary is not supported "
3594 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3595 ix86_preferred_stack_boundary_arg, min, max);
3598 ix86_preferred_stack_boundary
3599 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3602 /* Set the default value for -mstackrealign. */
3603 if (ix86_force_align_arg_pointer == -1)
3604 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3606 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3608 /* Validate -mincoming-stack-boundary= value or default it to
3609 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3610 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3611 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3613 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3614 || ix86_incoming_stack_boundary_arg > 12)
3615 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3616 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3619 ix86_user_incoming_stack_boundary
3620 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3621 ix86_incoming_stack_boundary
3622 = ix86_user_incoming_stack_boundary;
3626 /* Accept -msseregparm only if at least SSE support is enabled. */
3627 if (TARGET_SSEREGPARM
3629 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3631 if (global_options_set.x_ix86_fpmath)
3633 if (ix86_fpmath & FPMATH_SSE)
3637 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3638 ix86_fpmath = FPMATH_387;
3640 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3642 warning (0, "387 instruction set disabled, using SSE arithmetics");
3643 ix86_fpmath = FPMATH_SSE;
3648 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3650 /* If the i387 is disabled, then do not return values in it. */
3652 target_flags &= ~MASK_FLOAT_RETURNS;
3654 /* Use external vectorized library in vectorizing intrinsics. */
3655 if (global_options_set.x_ix86_veclibabi_type)
3656 switch (ix86_veclibabi_type)
3658 case ix86_veclibabi_type_svml:
3659 ix86_veclib_handler = ix86_veclibabi_svml;
3662 case ix86_veclibabi_type_acml:
3663 ix86_veclib_handler = ix86_veclibabi_acml;
3670 if ((!USE_IX86_FRAME_POINTER
3671 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3672 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3674 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3676 /* ??? Unwind info is not correct around the CFG unless either a frame
3677 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3678 unwind info generation to be aware of the CFG and propagating states
3680 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3681 || flag_exceptions || flag_non_call_exceptions)
3682 && flag_omit_frame_pointer
3683 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3685 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3686 warning (0, "unwind tables currently require either a frame pointer "
3687 "or %saccumulate-outgoing-args%s for correctness",
3689 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3692 /* If stack probes are required, the space used for large function
3693 arguments on the stack must also be probed, so enable
3694 -maccumulate-outgoing-args so this happens in the prologue. */
3695 if (TARGET_STACK_PROBE
3696 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3698 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3699 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3700 "for correctness", prefix, suffix);
3701 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3704 /* For sane SSE instruction set generation we need fcomi instruction.
3705 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3706 expands to a sequence that includes conditional move. */
3707 if (TARGET_SSE || TARGET_RDRND)
3710 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3713 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3714 p = strchr (internal_label_prefix, 'X');
3715 internal_label_prefix_len = p - internal_label_prefix;
3719 /* When scheduling description is not available, disable scheduler pass
3720 so it won't slow down the compilation and make x87 code slower. */
3721 if (!TARGET_SCHEDULE)
3722 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3724 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3725 ix86_cost->simultaneous_prefetches,
3726 global_options.x_param_values,
3727 global_options_set.x_param_values);
3728 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3729 global_options.x_param_values,
3730 global_options_set.x_param_values);
3731 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3732 global_options.x_param_values,
3733 global_options_set.x_param_values);
3734 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3735 global_options.x_param_values,
3736 global_options_set.x_param_values);
3738 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3739 if (flag_prefetch_loop_arrays < 0
3742 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3743 flag_prefetch_loop_arrays = 1;
3745 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3746 can be optimized to ap = __builtin_next_arg (0). */
3747 if (!TARGET_64BIT && !flag_split_stack)
3748 targetm.expand_builtin_va_start = NULL;
3752 ix86_gen_leave = gen_leave_rex64;
3753 ix86_gen_add3 = gen_adddi3;
3754 ix86_gen_sub3 = gen_subdi3;
3755 ix86_gen_sub3_carry = gen_subdi3_carry;
3756 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3757 ix86_gen_monitor = gen_sse3_monitor64;
3758 ix86_gen_andsp = gen_anddi3;
3759 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3760 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3761 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3765 ix86_gen_leave = gen_leave;
3766 ix86_gen_add3 = gen_addsi3;
3767 ix86_gen_sub3 = gen_subsi3;
3768 ix86_gen_sub3_carry = gen_subsi3_carry;
3769 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3770 ix86_gen_monitor = gen_sse3_monitor;
3771 ix86_gen_andsp = gen_andsi3;
3772 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3773 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3774 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3778 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3780 target_flags |= MASK_CLD & ~target_flags_explicit;
3783 if (!TARGET_64BIT && flag_pic)
3785 if (flag_fentry > 0)
3786 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3790 else if (TARGET_SEH)
3792 if (flag_fentry == 0)
3793 sorry ("-mno-fentry isn%'t compatible with SEH");
3796 else if (flag_fentry < 0)
3798 #if defined(PROFILE_BEFORE_PROLOGUE)
3807 /* When not optimize for size, enable vzeroupper optimization for
3808 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3809 AVX unaligned load/store. */
3812 if (flag_expensive_optimizations
3813 && !(target_flags_explicit & MASK_VZEROUPPER))
3814 target_flags |= MASK_VZEROUPPER;
3815 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3816 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3817 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3818 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3819 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3820 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3821 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3822 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3823 target_flags |= MASK_PREFER_AVX128;
3828 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3829 target_flags &= ~MASK_VZEROUPPER;
3832 if (ix86_recip_name)
3834 char *p = ASTRDUP (ix86_recip_name);
3836 unsigned int mask, i;
3839 while ((q = strtok (p, ",")) != NULL)
3850 if (!strcmp (q, "default"))
3851 mask = RECIP_MASK_ALL;
3854 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
3855 if (!strcmp (q, recip_options[i].string))
3857 mask = recip_options[i].mask;
3861 if (i == ARRAY_SIZE (recip_options))
3863 error ("unknown option for -mrecip=%s", q);
3865 mask = RECIP_MASK_NONE;
3869 recip_mask_explicit |= mask;
3871 recip_mask &= ~mask;
3878 recip_mask |= RECIP_MASK_ALL & ~recip_mask_explicit;
3879 else if (target_flags_explicit & MASK_RECIP)
3880 recip_mask &= ~(RECIP_MASK_ALL & ~recip_mask_explicit);
3882 /* Save the initial options in case the user does function specific
3885 target_option_default_node = target_option_current_node
3886 = build_target_option_node ();
3889 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3892 function_pass_avx256_p (const_rtx val)
3897 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3900 if (GET_CODE (val) == PARALLEL)
3905 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3907 r = XVECEXP (val, 0, i);
3908 if (GET_CODE (r) == EXPR_LIST
3910 && REG_P (XEXP (r, 0))
3911 && (GET_MODE (XEXP (r, 0)) == OImode
3912 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3920 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3923 ix86_option_override (void)
3925 ix86_option_override_internal (true);
3928 /* Update register usage after having seen the compiler flags. */
3931 ix86_conditional_register_usage (void)
3936 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3938 if (fixed_regs[i] > 1)
3939 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3940 if (call_used_regs[i] > 1)
3941 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3944 /* The PIC register, if it exists, is fixed. */
3945 j = PIC_OFFSET_TABLE_REGNUM;
3946 if (j != INVALID_REGNUM)
3947 fixed_regs[j] = call_used_regs[j] = 1;
3949 /* The 64-bit MS_ABI changes the set of call-used registers. */
3950 if (TARGET_64BIT_MS_ABI)
3952 call_used_regs[SI_REG] = 0;
3953 call_used_regs[DI_REG] = 0;
3954 call_used_regs[XMM6_REG] = 0;
3955 call_used_regs[XMM7_REG] = 0;
3956 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3957 call_used_regs[i] = 0;
3960 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3961 other call-clobbered regs for 64-bit. */
3964 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3966 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3967 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3968 && call_used_regs[i])
3969 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3972 /* If MMX is disabled, squash the registers. */
3974 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3975 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3976 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3978 /* If SSE is disabled, squash the registers. */
3980 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3981 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3982 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3984 /* If the FPU is disabled, squash the registers. */
3985 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3986 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3987 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3988 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3990 /* If 32-bit, squash the 64-bit registers. */
3993 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3995 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4001 /* Save the current options */
4004 ix86_function_specific_save (struct cl_target_option *ptr)
4006 ptr->arch = ix86_arch;
4007 ptr->schedule = ix86_schedule;
4008 ptr->tune = ix86_tune;
4009 ptr->branch_cost = ix86_branch_cost;
4010 ptr->tune_defaulted = ix86_tune_defaulted;
4011 ptr->arch_specified = ix86_arch_specified;
4012 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4013 ptr->ix86_target_flags_explicit = target_flags_explicit;
4014 ptr->x_recip_mask_explicit = recip_mask_explicit;
4016 /* The fields are char but the variables are not; make sure the
4017 values fit in the fields. */
4018 gcc_assert (ptr->arch == ix86_arch);
4019 gcc_assert (ptr->schedule == ix86_schedule);
4020 gcc_assert (ptr->tune == ix86_tune);
4021 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4024 /* Restore the current options */
4027 ix86_function_specific_restore (struct cl_target_option *ptr)
4029 enum processor_type old_tune = ix86_tune;
4030 enum processor_type old_arch = ix86_arch;
4031 unsigned int ix86_arch_mask, ix86_tune_mask;
4034 ix86_arch = (enum processor_type) ptr->arch;
4035 ix86_schedule = (enum attr_cpu) ptr->schedule;
4036 ix86_tune = (enum processor_type) ptr->tune;
4037 ix86_branch_cost = ptr->branch_cost;
4038 ix86_tune_defaulted = ptr->tune_defaulted;
4039 ix86_arch_specified = ptr->arch_specified;
4040 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4041 target_flags_explicit = ptr->ix86_target_flags_explicit;
4042 recip_mask_explicit = ptr->x_recip_mask_explicit;
4044 /* Recreate the arch feature tests if the arch changed */
4045 if (old_arch != ix86_arch)
4047 ix86_arch_mask = 1u << ix86_arch;
4048 for (i = 0; i < X86_ARCH_LAST; ++i)
4049 ix86_arch_features[i]
4050 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4053 /* Recreate the tune optimization tests */
4054 if (old_tune != ix86_tune)
4056 ix86_tune_mask = 1u << ix86_tune;
4057 for (i = 0; i < X86_TUNE_LAST; ++i)
4058 ix86_tune_features[i]
4059 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4063 /* Print the current options */
4066 ix86_function_specific_print (FILE *file, int indent,
4067 struct cl_target_option *ptr)
4070 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4071 NULL, NULL, ptr->x_ix86_fpmath, false);
4073 fprintf (file, "%*sarch = %d (%s)\n",
4076 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4077 ? cpu_names[ptr->arch]
4080 fprintf (file, "%*stune = %d (%s)\n",
4083 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4084 ? cpu_names[ptr->tune]
4087 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4091 fprintf (file, "%*s%s\n", indent, "", target_string);
4092 free (target_string);
4097 /* Inner function to process the attribute((target(...))), take an argument and
4098 set the current options from the argument. If we have a list, recursively go
4102 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4103 struct gcc_options *enum_opts_set)
4108 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4109 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4110 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4111 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4112 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4128 enum ix86_opt_type type;
4133 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4134 IX86_ATTR_ISA ("abm", OPT_mabm),
4135 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4136 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4137 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4138 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4139 IX86_ATTR_ISA ("aes", OPT_maes),
4140 IX86_ATTR_ISA ("avx", OPT_mavx),
4141 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4142 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4143 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4144 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4145 IX86_ATTR_ISA ("sse", OPT_msse),
4146 IX86_ATTR_ISA ("sse2", OPT_msse2),
4147 IX86_ATTR_ISA ("sse3", OPT_msse3),
4148 IX86_ATTR_ISA ("sse4", OPT_msse4),
4149 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4150 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4151 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4152 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4153 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4154 IX86_ATTR_ISA ("fma", OPT_mfma),
4155 IX86_ATTR_ISA ("xop", OPT_mxop),
4156 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4157 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4158 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4159 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4162 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4164 /* string options */
4165 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4166 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4169 IX86_ATTR_YES ("cld",
4173 IX86_ATTR_NO ("fancy-math-387",
4174 OPT_mfancy_math_387,
4175 MASK_NO_FANCY_MATH_387),
4177 IX86_ATTR_YES ("ieee-fp",
4181 IX86_ATTR_YES ("inline-all-stringops",
4182 OPT_minline_all_stringops,
4183 MASK_INLINE_ALL_STRINGOPS),
4185 IX86_ATTR_YES ("inline-stringops-dynamically",
4186 OPT_minline_stringops_dynamically,
4187 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4189 IX86_ATTR_NO ("align-stringops",
4190 OPT_mno_align_stringops,
4191 MASK_NO_ALIGN_STRINGOPS),
4193 IX86_ATTR_YES ("recip",
4199 /* If this is a list, recurse to get the options. */
4200 if (TREE_CODE (args) == TREE_LIST)
4204 for (; args; args = TREE_CHAIN (args))
4205 if (TREE_VALUE (args)
4206 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4207 p_strings, enum_opts_set))
4213 else if (TREE_CODE (args) != STRING_CST)
4216 /* Handle multiple arguments separated by commas. */
4217 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4219 while (next_optstr && *next_optstr != '\0')
4221 char *p = next_optstr;
4223 char *comma = strchr (next_optstr, ',');
4224 const char *opt_string;
4225 size_t len, opt_len;
4230 enum ix86_opt_type type = ix86_opt_unknown;
4236 len = comma - next_optstr;
4237 next_optstr = comma + 1;
4245 /* Recognize no-xxx. */
4246 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4255 /* Find the option. */
4258 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4260 type = attrs[i].type;
4261 opt_len = attrs[i].len;
4262 if (ch == attrs[i].string[0]
4263 && ((type != ix86_opt_str && type != ix86_opt_enum)
4266 && memcmp (p, attrs[i].string, opt_len) == 0)
4269 mask = attrs[i].mask;
4270 opt_string = attrs[i].string;
4275 /* Process the option. */
4278 error ("attribute(target(\"%s\")) is unknown", orig_p);
4282 else if (type == ix86_opt_isa)
4284 struct cl_decoded_option decoded;
4286 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4287 ix86_handle_option (&global_options, &global_options_set,
4288 &decoded, input_location);
4291 else if (type == ix86_opt_yes || type == ix86_opt_no)
4293 if (type == ix86_opt_no)
4294 opt_set_p = !opt_set_p;
4297 target_flags |= mask;
4299 target_flags &= ~mask;
4302 else if (type == ix86_opt_str)
4306 error ("option(\"%s\") was already specified", opt_string);
4310 p_strings[opt] = xstrdup (p + opt_len);
4313 else if (type == ix86_opt_enum)
4318 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4320 set_option (&global_options, enum_opts_set, opt, value,
4321 p + opt_len, DK_UNSPECIFIED, input_location,
4325 error ("attribute(target(\"%s\")) is unknown", orig_p);
4337 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4340 ix86_valid_target_attribute_tree (tree args)
4342 const char *orig_arch_string = ix86_arch_string;
4343 const char *orig_tune_string = ix86_tune_string;
4344 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4345 int orig_tune_defaulted = ix86_tune_defaulted;
4346 int orig_arch_specified = ix86_arch_specified;
4347 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4350 struct cl_target_option *def
4351 = TREE_TARGET_OPTION (target_option_default_node);
4352 struct gcc_options enum_opts_set;
4354 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4356 /* Process each of the options on the chain. */
4357 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4361 /* If the changed options are different from the default, rerun
4362 ix86_option_override_internal, and then save the options away.
4363 The string options are are attribute options, and will be undone
4364 when we copy the save structure. */
4365 if (ix86_isa_flags != def->x_ix86_isa_flags
4366 || target_flags != def->x_target_flags
4367 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4368 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4369 || enum_opts_set.x_ix86_fpmath)
4371 /* If we are using the default tune= or arch=, undo the string assigned,
4372 and use the default. */
4373 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4374 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4375 else if (!orig_arch_specified)
4376 ix86_arch_string = NULL;
4378 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4379 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4380 else if (orig_tune_defaulted)
4381 ix86_tune_string = NULL;
4383 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4384 if (enum_opts_set.x_ix86_fpmath)
4385 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4386 else if (!TARGET_64BIT && TARGET_SSE)
4388 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4389 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4392 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4393 ix86_option_override_internal (false);
4395 /* Add any builtin functions with the new isa if any. */
4396 ix86_add_new_builtins (ix86_isa_flags);
4398 /* Save the current options unless we are validating options for
4400 t = build_target_option_node ();
4402 ix86_arch_string = orig_arch_string;
4403 ix86_tune_string = orig_tune_string;
4404 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4406 /* Free up memory allocated to hold the strings */
4407 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4408 free (option_strings[i]);
4414 /* Hook to validate attribute((target("string"))). */
4417 ix86_valid_target_attribute_p (tree fndecl,
4418 tree ARG_UNUSED (name),
4420 int ARG_UNUSED (flags))
4422 struct cl_target_option cur_target;
4424 tree old_optimize = build_optimization_node ();
4425 tree new_target, new_optimize;
4426 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4428 /* If the function changed the optimization levels as well as setting target
4429 options, start with the optimizations specified. */
4430 if (func_optimize && func_optimize != old_optimize)
4431 cl_optimization_restore (&global_options,
4432 TREE_OPTIMIZATION (func_optimize));
4434 /* The target attributes may also change some optimization flags, so update
4435 the optimization options if necessary. */
4436 cl_target_option_save (&cur_target, &global_options);
4437 new_target = ix86_valid_target_attribute_tree (args);
4438 new_optimize = build_optimization_node ();
4445 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4447 if (old_optimize != new_optimize)
4448 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4451 cl_target_option_restore (&global_options, &cur_target);
4453 if (old_optimize != new_optimize)
4454 cl_optimization_restore (&global_options,
4455 TREE_OPTIMIZATION (old_optimize));
4461 /* Hook to determine if one function can safely inline another. */
4464 ix86_can_inline_p (tree caller, tree callee)
4467 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4468 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4470 /* If callee has no option attributes, then it is ok to inline. */
4474 /* If caller has no option attributes, but callee does then it is not ok to
4476 else if (!caller_tree)
4481 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4482 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4484 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4485 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4487 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4488 != callee_opts->x_ix86_isa_flags)
4491 /* See if we have the same non-isa options. */
4492 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4495 /* See if arch, tune, etc. are the same. */
4496 else if (caller_opts->arch != callee_opts->arch)
4499 else if (caller_opts->tune != callee_opts->tune)
4502 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4505 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4516 /* Remember the last target of ix86_set_current_function. */
4517 static GTY(()) tree ix86_previous_fndecl;
4519 /* Establish appropriate back-end context for processing the function
4520 FNDECL. The argument might be NULL to indicate processing at top
4521 level, outside of any function scope. */
4523 ix86_set_current_function (tree fndecl)
4525 /* Only change the context if the function changes. This hook is called
4526 several times in the course of compiling a function, and we don't want to
4527 slow things down too much or call target_reinit when it isn't safe. */
4528 if (fndecl && fndecl != ix86_previous_fndecl)
4530 tree old_tree = (ix86_previous_fndecl
4531 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4534 tree new_tree = (fndecl
4535 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4538 ix86_previous_fndecl = fndecl;
4539 if (old_tree == new_tree)
4544 cl_target_option_restore (&global_options,
4545 TREE_TARGET_OPTION (new_tree));
4551 struct cl_target_option *def
4552 = TREE_TARGET_OPTION (target_option_current_node);
4554 cl_target_option_restore (&global_options, def);
4561 /* Return true if this goes in large data/bss. */
4564 ix86_in_large_data_p (tree exp)
4566 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4569 /* Functions are never large data. */
4570 if (TREE_CODE (exp) == FUNCTION_DECL)
4573 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4575 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4576 if (strcmp (section, ".ldata") == 0
4577 || strcmp (section, ".lbss") == 0)
4583 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4585 /* If this is an incomplete type with size 0, then we can't put it
4586 in data because it might be too big when completed. */
4587 if (!size || size > ix86_section_threshold)
4594 /* Switch to the appropriate section for output of DECL.
4595 DECL is either a `VAR_DECL' node or a constant of some sort.
4596 RELOC indicates whether forming the initial value of DECL requires
4597 link-time relocations. */
4599 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4603 x86_64_elf_select_section (tree decl, int reloc,
4604 unsigned HOST_WIDE_INT align)
4606 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4607 && ix86_in_large_data_p (decl))
4609 const char *sname = NULL;
4610 unsigned int flags = SECTION_WRITE;
4611 switch (categorize_decl_for_section (decl, reloc))
4616 case SECCAT_DATA_REL:
4617 sname = ".ldata.rel";
4619 case SECCAT_DATA_REL_LOCAL:
4620 sname = ".ldata.rel.local";
4622 case SECCAT_DATA_REL_RO:
4623 sname = ".ldata.rel.ro";
4625 case SECCAT_DATA_REL_RO_LOCAL:
4626 sname = ".ldata.rel.ro.local";
4630 flags |= SECTION_BSS;
4633 case SECCAT_RODATA_MERGE_STR:
4634 case SECCAT_RODATA_MERGE_STR_INIT:
4635 case SECCAT_RODATA_MERGE_CONST:
4639 case SECCAT_SRODATA:
4646 /* We don't split these for medium model. Place them into
4647 default sections and hope for best. */
4652 /* We might get called with string constants, but get_named_section
4653 doesn't like them as they are not DECLs. Also, we need to set
4654 flags in that case. */
4656 return get_section (sname, flags, NULL);
4657 return get_named_section (decl, sname, reloc);
4660 return default_elf_select_section (decl, reloc, align);
4663 /* Build up a unique section name, expressed as a
4664 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4665 RELOC indicates whether the initial value of EXP requires
4666 link-time relocations. */
4668 static void ATTRIBUTE_UNUSED
4669 x86_64_elf_unique_section (tree decl, int reloc)
4671 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4672 && ix86_in_large_data_p (decl))
4674 const char *prefix = NULL;
4675 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4676 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4678 switch (categorize_decl_for_section (decl, reloc))
4681 case SECCAT_DATA_REL:
4682 case SECCAT_DATA_REL_LOCAL:
4683 case SECCAT_DATA_REL_RO:
4684 case SECCAT_DATA_REL_RO_LOCAL:
4685 prefix = one_only ? ".ld" : ".ldata";
4688 prefix = one_only ? ".lb" : ".lbss";
4691 case SECCAT_RODATA_MERGE_STR:
4692 case SECCAT_RODATA_MERGE_STR_INIT:
4693 case SECCAT_RODATA_MERGE_CONST:
4694 prefix = one_only ? ".lr" : ".lrodata";
4696 case SECCAT_SRODATA:
4703 /* We don't split these for medium model. Place them into
4704 default sections and hope for best. */
4709 const char *name, *linkonce;
4712 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4713 name = targetm.strip_name_encoding (name);
4715 /* If we're using one_only, then there needs to be a .gnu.linkonce
4716 prefix to the section name. */
4717 linkonce = one_only ? ".gnu.linkonce" : "";
4719 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4721 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4725 default_unique_section (decl, reloc);
4728 #ifdef COMMON_ASM_OP
4729 /* This says how to output assembler code to declare an
4730 uninitialized external linkage data object.
4732 For medium model x86-64 we need to use .largecomm opcode for
4735 x86_elf_aligned_common (FILE *file,
4736 const char *name, unsigned HOST_WIDE_INT size,
4739 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4740 && size > (unsigned int)ix86_section_threshold)
4741 fputs (".largecomm\t", file);
4743 fputs (COMMON_ASM_OP, file);
4744 assemble_name (file, name);
4745 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4746 size, align / BITS_PER_UNIT);
4750 /* Utility function for targets to use in implementing
4751 ASM_OUTPUT_ALIGNED_BSS. */
4754 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4755 const char *name, unsigned HOST_WIDE_INT size,
4758 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4759 && size > (unsigned int)ix86_section_threshold)
4760 switch_to_section (get_named_section (decl, ".lbss", 0));
4762 switch_to_section (bss_section);
4763 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4764 #ifdef ASM_DECLARE_OBJECT_NAME
4765 last_assemble_variable_decl = decl;
4766 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4768 /* Standard thing is just output label for the object. */
4769 ASM_OUTPUT_LABEL (file, name);
4770 #endif /* ASM_DECLARE_OBJECT_NAME */
4771 ASM_OUTPUT_SKIP (file, size ? size : 1);
4774 /* Decide whether we must probe the stack before any space allocation
4775 on this target. It's essentially TARGET_STACK_PROBE except when
4776 -fstack-check causes the stack to be already probed differently. */
4779 ix86_target_stack_probe (void)
4781 /* Do not probe the stack twice if static stack checking is enabled. */
4782 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4785 return TARGET_STACK_PROBE;
4788 /* Decide whether we can make a sibling call to a function. DECL is the
4789 declaration of the function being targeted by the call and EXP is the
4790 CALL_EXPR representing the call. */
4793 ix86_function_ok_for_sibcall (tree decl, tree exp)
4795 tree type, decl_or_type;
4798 /* If we are generating position-independent code, we cannot sibcall
4799 optimize any indirect call, or a direct call to a global function,
4800 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4804 && (!decl || !targetm.binds_local_p (decl)))
4807 /* If we need to align the outgoing stack, then sibcalling would
4808 unalign the stack, which may break the called function. */
4809 if (ix86_minimum_incoming_stack_boundary (true)
4810 < PREFERRED_STACK_BOUNDARY)
4815 decl_or_type = decl;
4816 type = TREE_TYPE (decl);
4820 /* We're looking at the CALL_EXPR, we need the type of the function. */
4821 type = CALL_EXPR_FN (exp); /* pointer expression */
4822 type = TREE_TYPE (type); /* pointer type */
4823 type = TREE_TYPE (type); /* function type */
4824 decl_or_type = type;
4827 /* Check that the return value locations are the same. Like
4828 if we are returning floats on the 80387 register stack, we cannot
4829 make a sibcall from a function that doesn't return a float to a
4830 function that does or, conversely, from a function that does return
4831 a float to a function that doesn't; the necessary stack adjustment
4832 would not be executed. This is also the place we notice
4833 differences in the return value ABI. Note that it is ok for one
4834 of the functions to have void return type as long as the return
4835 value of the other is passed in a register. */
4836 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4837 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4839 if (STACK_REG_P (a) || STACK_REG_P (b))
4841 if (!rtx_equal_p (a, b))
4844 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4846 /* Disable sibcall if we need to generate vzeroupper after
4848 if (TARGET_VZEROUPPER
4849 && cfun->machine->callee_return_avx256_p
4850 && !cfun->machine->caller_return_avx256_p)
4853 else if (!rtx_equal_p (a, b))
4858 /* The SYSV ABI has more call-clobbered registers;
4859 disallow sibcalls from MS to SYSV. */
4860 if (cfun->machine->call_abi == MS_ABI
4861 && ix86_function_type_abi (type) == SYSV_ABI)
4866 /* If this call is indirect, we'll need to be able to use a
4867 call-clobbered register for the address of the target function.
4868 Make sure that all such registers are not used for passing
4869 parameters. Note that DLLIMPORT functions are indirect. */
4871 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4873 if (ix86_function_regparm (type, NULL) >= 3)
4875 /* ??? Need to count the actual number of registers to be used,
4876 not the possible number of registers. Fix later. */
4882 /* Otherwise okay. That also includes certain types of indirect calls. */
4886 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4887 and "sseregparm" calling convention attributes;
4888 arguments as in struct attribute_spec.handler. */
4891 ix86_handle_cconv_attribute (tree *node, tree name,
4893 int flags ATTRIBUTE_UNUSED,
4896 if (TREE_CODE (*node) != FUNCTION_TYPE
4897 && TREE_CODE (*node) != METHOD_TYPE
4898 && TREE_CODE (*node) != FIELD_DECL
4899 && TREE_CODE (*node) != TYPE_DECL)
4901 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4903 *no_add_attrs = true;
4907 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4908 if (is_attribute_p ("regparm", name))
4912 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4914 error ("fastcall and regparm attributes are not compatible");
4917 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4919 error ("regparam and thiscall attributes are not compatible");
4922 cst = TREE_VALUE (args);
4923 if (TREE_CODE (cst) != INTEGER_CST)
4925 warning (OPT_Wattributes,
4926 "%qE attribute requires an integer constant argument",
4928 *no_add_attrs = true;
4930 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4932 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4934 *no_add_attrs = true;
4942 /* Do not warn when emulating the MS ABI. */
4943 if ((TREE_CODE (*node) != FUNCTION_TYPE
4944 && TREE_CODE (*node) != METHOD_TYPE)
4945 || ix86_function_type_abi (*node) != MS_ABI)
4946 warning (OPT_Wattributes, "%qE attribute ignored",
4948 *no_add_attrs = true;
4952 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4953 if (is_attribute_p ("fastcall", name))
4955 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4957 error ("fastcall and cdecl attributes are not compatible");
4959 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4961 error ("fastcall and stdcall attributes are not compatible");
4963 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4965 error ("fastcall and regparm attributes are not compatible");
4967 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4969 error ("fastcall and thiscall attributes are not compatible");
4973 /* Can combine stdcall with fastcall (redundant), regparm and
4975 else if (is_attribute_p ("stdcall", name))
4977 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4979 error ("stdcall and cdecl attributes are not compatible");
4981 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4983 error ("stdcall and fastcall attributes are not compatible");
4985 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4987 error ("stdcall and thiscall attributes are not compatible");
4991 /* Can combine cdecl with regparm and sseregparm. */
4992 else if (is_attribute_p ("cdecl", name))
4994 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4996 error ("stdcall and cdecl attributes are not compatible");
4998 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5000 error ("fastcall and cdecl attributes are not compatible");
5002 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5004 error ("cdecl and thiscall attributes are not compatible");
5007 else if (is_attribute_p ("thiscall", name))
5009 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5010 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5012 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5014 error ("stdcall and thiscall attributes are not compatible");
5016 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5018 error ("fastcall and thiscall attributes are not compatible");
5020 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5022 error ("cdecl and thiscall attributes are not compatible");
5026 /* Can combine sseregparm with all attributes. */
5031 /* This function determines from TYPE the calling-convention. */
5034 ix86_get_callcvt (const_tree type)
5036 unsigned int ret = 0;
5041 return IX86_CALLCVT_CDECL;
5043 attrs = TYPE_ATTRIBUTES (type);
5044 if (attrs != NULL_TREE)
5046 if (lookup_attribute ("cdecl", attrs))
5047 ret |= IX86_CALLCVT_CDECL;
5048 else if (lookup_attribute ("stdcall", attrs))
5049 ret |= IX86_CALLCVT_STDCALL;
5050 else if (lookup_attribute ("fastcall", attrs))
5051 ret |= IX86_CALLCVT_FASTCALL;
5052 else if (lookup_attribute ("thiscall", attrs))
5053 ret |= IX86_CALLCVT_THISCALL;
5055 /* Regparam isn't allowed for thiscall and fastcall. */
5056 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5058 if (lookup_attribute ("regparm", attrs))
5059 ret |= IX86_CALLCVT_REGPARM;
5060 if (lookup_attribute ("sseregparm", attrs))
5061 ret |= IX86_CALLCVT_SSEREGPARM;
5064 if (IX86_BASE_CALLCVT(ret) != 0)
5068 is_stdarg = stdarg_p (type);
5069 if (TARGET_RTD && !is_stdarg)
5070 return IX86_CALLCVT_STDCALL | ret;
5074 || TREE_CODE (type) != METHOD_TYPE
5075 || ix86_function_type_abi (type) != MS_ABI)
5076 return IX86_CALLCVT_CDECL | ret;
5078 return IX86_CALLCVT_THISCALL;
5081 /* Return 0 if the attributes for two types are incompatible, 1 if they
5082 are compatible, and 2 if they are nearly compatible (which causes a
5083 warning to be generated). */
5086 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5088 unsigned int ccvt1, ccvt2;
5090 if (TREE_CODE (type1) != FUNCTION_TYPE
5091 && TREE_CODE (type1) != METHOD_TYPE)
5094 ccvt1 = ix86_get_callcvt (type1);
5095 ccvt2 = ix86_get_callcvt (type2);
5098 if (ix86_function_regparm (type1, NULL)
5099 != ix86_function_regparm (type2, NULL))
5105 /* Return the regparm value for a function with the indicated TYPE and DECL.
5106 DECL may be NULL when calling function indirectly
5107 or considering a libcall. */
5110 ix86_function_regparm (const_tree type, const_tree decl)
5117 return (ix86_function_type_abi (type) == SYSV_ABI
5118 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5119 ccvt = ix86_get_callcvt (type);
5120 regparm = ix86_regparm;
5122 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5124 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5127 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5131 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5133 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5136 /* Use register calling convention for local functions when possible. */
5138 && TREE_CODE (decl) == FUNCTION_DECL
5140 && !(profile_flag && !flag_fentry))
5142 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5143 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5144 if (i && i->local && i->can_change_signature)
5146 int local_regparm, globals = 0, regno;
5148 /* Make sure no regparm register is taken by a
5149 fixed register variable. */
5150 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5151 if (fixed_regs[local_regparm])
5154 /* We don't want to use regparm(3) for nested functions as
5155 these use a static chain pointer in the third argument. */
5156 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5159 /* In 32-bit mode save a register for the split stack. */
5160 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5163 /* Each fixed register usage increases register pressure,
5164 so less registers should be used for argument passing.
5165 This functionality can be overriden by an explicit
5167 for (regno = 0; regno <= DI_REG; regno++)
5168 if (fixed_regs[regno])
5172 = globals < local_regparm ? local_regparm - globals : 0;
5174 if (local_regparm > regparm)
5175 regparm = local_regparm;
5182 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5183 DFmode (2) arguments in SSE registers for a function with the
5184 indicated TYPE and DECL. DECL may be NULL when calling function
5185 indirectly or considering a libcall. Otherwise return 0. */
5188 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5190 gcc_assert (!TARGET_64BIT);
5192 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5193 by the sseregparm attribute. */
5194 if (TARGET_SSEREGPARM
5195 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5202 error ("calling %qD with attribute sseregparm without "
5203 "SSE/SSE2 enabled", decl);
5205 error ("calling %qT with attribute sseregparm without "
5206 "SSE/SSE2 enabled", type);
5214 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5215 (and DFmode for SSE2) arguments in SSE registers. */
5216 if (decl && TARGET_SSE_MATH && optimize
5217 && !(profile_flag && !flag_fentry))
5219 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5220 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5221 if (i && i->local && i->can_change_signature)
5222 return TARGET_SSE2 ? 2 : 1;
5228 /* Return true if EAX is live at the start of the function. Used by
5229 ix86_expand_prologue to determine if we need special help before
5230 calling allocate_stack_worker. */
5233 ix86_eax_live_at_start_p (void)
5235 /* Cheat. Don't bother working forward from ix86_function_regparm
5236 to the function type to whether an actual argument is located in
5237 eax. Instead just look at cfg info, which is still close enough
5238 to correct at this point. This gives false positives for broken
5239 functions that might use uninitialized data that happens to be
5240 allocated in eax, but who cares? */
5241 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5245 ix86_keep_aggregate_return_pointer (tree fntype)
5251 attr = lookup_attribute ("callee_pop_aggregate_return",
5252 TYPE_ATTRIBUTES (fntype));
5254 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5256 /* For 32-bit MS-ABI the default is to keep aggregate
5258 if (ix86_function_type_abi (fntype) == MS_ABI)
5261 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5264 /* Value is the number of bytes of arguments automatically
5265 popped when returning from a subroutine call.
5266 FUNDECL is the declaration node of the function (as a tree),
5267 FUNTYPE is the data type of the function (as a tree),
5268 or for a library call it is an identifier node for the subroutine name.
5269 SIZE is the number of bytes of arguments passed on the stack.
5271 On the 80386, the RTD insn may be used to pop them if the number
5272 of args is fixed, but if the number is variable then the caller
5273 must pop them all. RTD can't be used for library calls now
5274 because the library is compiled with the Unix compiler.
5275 Use of RTD is a selectable option, since it is incompatible with
5276 standard Unix calling sequences. If the option is not selected,
5277 the caller must always pop the args.
5279 The attribute stdcall is equivalent to RTD on a per module basis. */
5282 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5286 /* None of the 64-bit ABIs pop arguments. */
5290 ccvt = ix86_get_callcvt (funtype);
5292 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5293 | IX86_CALLCVT_THISCALL)) != 0
5294 && ! stdarg_p (funtype))
5297 /* Lose any fake structure return argument if it is passed on the stack. */
5298 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5299 && !ix86_keep_aggregate_return_pointer (funtype))
5301 int nregs = ix86_function_regparm (funtype, fundecl);
5303 return GET_MODE_SIZE (Pmode);
5309 /* Argument support functions. */
5311 /* Return true when register may be used to pass function parameters. */
5313 ix86_function_arg_regno_p (int regno)
5316 const int *parm_regs;
5321 return (regno < REGPARM_MAX
5322 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5324 return (regno < REGPARM_MAX
5325 || (TARGET_MMX && MMX_REGNO_P (regno)
5326 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5327 || (TARGET_SSE && SSE_REGNO_P (regno)
5328 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5333 if (SSE_REGNO_P (regno) && TARGET_SSE)
5338 if (TARGET_SSE && SSE_REGNO_P (regno)
5339 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5343 /* TODO: The function should depend on current function ABI but
5344 builtins.c would need updating then. Therefore we use the
5347 /* RAX is used as hidden argument to va_arg functions. */
5348 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5351 if (ix86_abi == MS_ABI)
5352 parm_regs = x86_64_ms_abi_int_parameter_registers;
5354 parm_regs = x86_64_int_parameter_registers;
5355 for (i = 0; i < (ix86_abi == MS_ABI
5356 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5357 if (regno == parm_regs[i])
5362 /* Return if we do not know how to pass TYPE solely in registers. */
5365 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5367 if (must_pass_in_stack_var_size_or_pad (mode, type))
5370 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5371 The layout_type routine is crafty and tries to trick us into passing
5372 currently unsupported vector types on the stack by using TImode. */
5373 return (!TARGET_64BIT && mode == TImode
5374 && type && TREE_CODE (type) != VECTOR_TYPE);
5377 /* It returns the size, in bytes, of the area reserved for arguments passed
5378 in registers for the function represented by fndecl dependent to the used
5381 ix86_reg_parm_stack_space (const_tree fndecl)
5383 enum calling_abi call_abi = SYSV_ABI;
5384 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5385 call_abi = ix86_function_abi (fndecl);
5387 call_abi = ix86_function_type_abi (fndecl);
5388 if (TARGET_64BIT && call_abi == MS_ABI)
5393 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5396 ix86_function_type_abi (const_tree fntype)
5398 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5400 enum calling_abi abi = ix86_abi;
5401 if (abi == SYSV_ABI)
5403 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5406 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5414 ix86_function_ms_hook_prologue (const_tree fn)
5416 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5418 if (decl_function_context (fn) != NULL_TREE)
5419 error_at (DECL_SOURCE_LOCATION (fn),
5420 "ms_hook_prologue is not compatible with nested function");
5427 static enum calling_abi
5428 ix86_function_abi (const_tree fndecl)
5432 return ix86_function_type_abi (TREE_TYPE (fndecl));
5435 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5438 ix86_cfun_abi (void)
5442 return cfun->machine->call_abi;
5445 /* Write the extra assembler code needed to declare a function properly. */
5448 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5451 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5455 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5456 unsigned int filler_cc = 0xcccccccc;
5458 for (i = 0; i < filler_count; i += 4)
5459 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5462 #ifdef SUBTARGET_ASM_UNWIND_INIT
5463 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5466 ASM_OUTPUT_LABEL (asm_out_file, fname);
5468 /* Output magic byte marker, if hot-patch attribute is set. */
5473 /* leaq [%rsp + 0], %rsp */
5474 asm_fprintf (asm_out_file, ASM_BYTE
5475 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5479 /* movl.s %edi, %edi
5481 movl.s %esp, %ebp */
5482 asm_fprintf (asm_out_file, ASM_BYTE
5483 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5489 extern void init_regs (void);
5491 /* Implementation of call abi switching target hook. Specific to FNDECL
5492 the specific call register sets are set. See also
5493 ix86_conditional_register_usage for more details. */
5495 ix86_call_abi_override (const_tree fndecl)
5497 if (fndecl == NULL_TREE)
5498 cfun->machine->call_abi = ix86_abi;
5500 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5503 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5504 expensive re-initialization of init_regs each time we switch function context
5505 since this is needed only during RTL expansion. */
5507 ix86_maybe_switch_abi (void)
5510 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5514 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5515 for a call to a function whose data type is FNTYPE.
5516 For a library call, FNTYPE is 0. */
5519 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5520 tree fntype, /* tree ptr for function decl */
5521 rtx libname, /* SYMBOL_REF of library name or 0 */
5525 struct cgraph_local_info *i;
5528 memset (cum, 0, sizeof (*cum));
5530 /* Initialize for the current callee. */
5533 cfun->machine->callee_pass_avx256_p = false;
5534 cfun->machine->callee_return_avx256_p = false;
5539 i = cgraph_local_info (fndecl);
5540 cum->call_abi = ix86_function_abi (fndecl);
5541 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5546 cum->call_abi = ix86_function_type_abi (fntype);
5548 fnret_type = TREE_TYPE (fntype);
5553 if (TARGET_VZEROUPPER && fnret_type)
5555 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5557 if (function_pass_avx256_p (fnret_value))
5559 /* The return value of this function uses 256bit AVX modes. */
5561 cfun->machine->callee_return_avx256_p = true;
5563 cfun->machine->caller_return_avx256_p = true;
5567 cum->caller = caller;
5569 /* Set up the number of registers to use for passing arguments. */
5571 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5572 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5573 "or subtarget optimization implying it");
5574 cum->nregs = ix86_regparm;
5577 cum->nregs = (cum->call_abi == SYSV_ABI
5578 ? X86_64_REGPARM_MAX
5579 : X86_64_MS_REGPARM_MAX);
5583 cum->sse_nregs = SSE_REGPARM_MAX;
5586 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5587 ? X86_64_SSE_REGPARM_MAX
5588 : X86_64_MS_SSE_REGPARM_MAX);
5592 cum->mmx_nregs = MMX_REGPARM_MAX;
5593 cum->warn_avx = true;
5594 cum->warn_sse = true;
5595 cum->warn_mmx = true;
5597 /* Because type might mismatch in between caller and callee, we need to
5598 use actual type of function for local calls.
5599 FIXME: cgraph_analyze can be told to actually record if function uses
5600 va_start so for local functions maybe_vaarg can be made aggressive
5602 FIXME: once typesytem is fixed, we won't need this code anymore. */
5603 if (i && i->local && i->can_change_signature)
5604 fntype = TREE_TYPE (fndecl);
5605 cum->maybe_vaarg = (fntype
5606 ? (!prototype_p (fntype) || stdarg_p (fntype))
5611 /* If there are variable arguments, then we won't pass anything
5612 in registers in 32-bit mode. */
5613 if (stdarg_p (fntype))
5624 /* Use ecx and edx registers if function has fastcall attribute,
5625 else look for regparm information. */
5628 unsigned int ccvt = ix86_get_callcvt (fntype);
5629 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5632 cum->fastcall = 1; /* Same first register as in fastcall. */
5634 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5640 cum->nregs = ix86_function_regparm (fntype, fndecl);
5643 /* Set up the number of SSE registers used for passing SFmode
5644 and DFmode arguments. Warn for mismatching ABI. */
5645 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5649 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5650 But in the case of vector types, it is some vector mode.
5652 When we have only some of our vector isa extensions enabled, then there
5653 are some modes for which vector_mode_supported_p is false. For these
5654 modes, the generic vector support in gcc will choose some non-vector mode
5655 in order to implement the type. By computing the natural mode, we'll
5656 select the proper ABI location for the operand and not depend on whatever
5657 the middle-end decides to do with these vector types.
5659 The midde-end can't deal with the vector types > 16 bytes. In this
5660 case, we return the original mode and warn ABI change if CUM isn't
5663 static enum machine_mode
5664 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5666 enum machine_mode mode = TYPE_MODE (type);
5668 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5670 HOST_WIDE_INT size = int_size_in_bytes (type);
5671 if ((size == 8 || size == 16 || size == 32)
5672 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5673 && TYPE_VECTOR_SUBPARTS (type) > 1)
5675 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5677 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5678 mode = MIN_MODE_VECTOR_FLOAT;
5680 mode = MIN_MODE_VECTOR_INT;
5682 /* Get the mode which has this inner mode and number of units. */
5683 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5684 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5685 && GET_MODE_INNER (mode) == innermode)
5687 if (size == 32 && !TARGET_AVX)
5689 static bool warnedavx;
5696 warning (0, "AVX vector argument without AVX "
5697 "enabled changes the ABI");
5699 return TYPE_MODE (type);
5712 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5713 this may not agree with the mode that the type system has chosen for the
5714 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5715 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5718 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5723 if (orig_mode != BLKmode)
5724 tmp = gen_rtx_REG (orig_mode, regno);
5727 tmp = gen_rtx_REG (mode, regno);
5728 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5729 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5735 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5736 of this code is to classify each 8bytes of incoming argument by the register
5737 class and assign registers accordingly. */
5739 /* Return the union class of CLASS1 and CLASS2.
5740 See the x86-64 PS ABI for details. */
5742 static enum x86_64_reg_class
5743 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5745 /* Rule #1: If both classes are equal, this is the resulting class. */
5746 if (class1 == class2)
5749 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5751 if (class1 == X86_64_NO_CLASS)
5753 if (class2 == X86_64_NO_CLASS)
5756 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5757 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5758 return X86_64_MEMORY_CLASS;
5760 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5761 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5762 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5763 return X86_64_INTEGERSI_CLASS;
5764 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5765 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5766 return X86_64_INTEGER_CLASS;
5768 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5770 if (class1 == X86_64_X87_CLASS
5771 || class1 == X86_64_X87UP_CLASS
5772 || class1 == X86_64_COMPLEX_X87_CLASS
5773 || class2 == X86_64_X87_CLASS
5774 || class2 == X86_64_X87UP_CLASS
5775 || class2 == X86_64_COMPLEX_X87_CLASS)
5776 return X86_64_MEMORY_CLASS;
5778 /* Rule #6: Otherwise class SSE is used. */
5779 return X86_64_SSE_CLASS;
5782 /* Classify the argument of type TYPE and mode MODE.
5783 CLASSES will be filled by the register class used to pass each word
5784 of the operand. The number of words is returned. In case the parameter
5785 should be passed in memory, 0 is returned. As a special case for zero
5786 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5788 BIT_OFFSET is used internally for handling records and specifies offset
5789 of the offset in bits modulo 256 to avoid overflow cases.
5791 See the x86-64 PS ABI for details.
5795 classify_argument (enum machine_mode mode, const_tree type,
5796 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5798 HOST_WIDE_INT bytes =
5799 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5800 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5802 /* Variable sized entities are always passed/returned in memory. */
5806 if (mode != VOIDmode
5807 && targetm.calls.must_pass_in_stack (mode, type))
5810 if (type && AGGREGATE_TYPE_P (type))
5814 enum x86_64_reg_class subclasses[MAX_CLASSES];
5816 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5820 for (i = 0; i < words; i++)
5821 classes[i] = X86_64_NO_CLASS;
5823 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5824 signalize memory class, so handle it as special case. */
5827 classes[0] = X86_64_NO_CLASS;
5831 /* Classify each field of record and merge classes. */
5832 switch (TREE_CODE (type))
5835 /* And now merge the fields of structure. */
5836 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5838 if (TREE_CODE (field) == FIELD_DECL)
5842 if (TREE_TYPE (field) == error_mark_node)
5845 /* Bitfields are always classified as integer. Handle them
5846 early, since later code would consider them to be
5847 misaligned integers. */
5848 if (DECL_BIT_FIELD (field))
5850 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5851 i < ((int_bit_position (field) + (bit_offset % 64))
5852 + tree_low_cst (DECL_SIZE (field), 0)
5855 merge_classes (X86_64_INTEGER_CLASS,
5862 type = TREE_TYPE (field);
5864 /* Flexible array member is ignored. */
5865 if (TYPE_MODE (type) == BLKmode
5866 && TREE_CODE (type) == ARRAY_TYPE
5867 && TYPE_SIZE (type) == NULL_TREE
5868 && TYPE_DOMAIN (type) != NULL_TREE
5869 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5874 if (!warned && warn_psabi)
5877 inform (input_location,
5878 "the ABI of passing struct with"
5879 " a flexible array member has"
5880 " changed in GCC 4.4");
5884 num = classify_argument (TYPE_MODE (type), type,
5886 (int_bit_position (field)
5887 + bit_offset) % 256);
5890 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5891 for (i = 0; i < num && (i + pos) < words; i++)
5893 merge_classes (subclasses[i], classes[i + pos]);
5900 /* Arrays are handled as small records. */
5903 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5904 TREE_TYPE (type), subclasses, bit_offset);
5908 /* The partial classes are now full classes. */
5909 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5910 subclasses[0] = X86_64_SSE_CLASS;
5911 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5912 && !((bit_offset % 64) == 0 && bytes == 4))
5913 subclasses[0] = X86_64_INTEGER_CLASS;
5915 for (i = 0; i < words; i++)
5916 classes[i] = subclasses[i % num];
5921 case QUAL_UNION_TYPE:
5922 /* Unions are similar to RECORD_TYPE but offset is always 0.
5924 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5926 if (TREE_CODE (field) == FIELD_DECL)
5930 if (TREE_TYPE (field) == error_mark_node)
5933 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5934 TREE_TYPE (field), subclasses,
5938 for (i = 0; i < num; i++)
5939 classes[i] = merge_classes (subclasses[i], classes[i]);
5950 /* When size > 16 bytes, if the first one isn't
5951 X86_64_SSE_CLASS or any other ones aren't
5952 X86_64_SSEUP_CLASS, everything should be passed in
5954 if (classes[0] != X86_64_SSE_CLASS)
5957 for (i = 1; i < words; i++)
5958 if (classes[i] != X86_64_SSEUP_CLASS)
5962 /* Final merger cleanup. */
5963 for (i = 0; i < words; i++)
5965 /* If one class is MEMORY, everything should be passed in
5967 if (classes[i] == X86_64_MEMORY_CLASS)
5970 /* The X86_64_SSEUP_CLASS should be always preceded by
5971 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5972 if (classes[i] == X86_64_SSEUP_CLASS
5973 && classes[i - 1] != X86_64_SSE_CLASS
5974 && classes[i - 1] != X86_64_SSEUP_CLASS)
5976 /* The first one should never be X86_64_SSEUP_CLASS. */
5977 gcc_assert (i != 0);
5978 classes[i] = X86_64_SSE_CLASS;
5981 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5982 everything should be passed in memory. */
5983 if (classes[i] == X86_64_X87UP_CLASS
5984 && (classes[i - 1] != X86_64_X87_CLASS))
5988 /* The first one should never be X86_64_X87UP_CLASS. */
5989 gcc_assert (i != 0);
5990 if (!warned && warn_psabi)
5993 inform (input_location,
5994 "the ABI of passing union with long double"
5995 " has changed in GCC 4.4");
6003 /* Compute alignment needed. We align all types to natural boundaries with
6004 exception of XFmode that is aligned to 64bits. */
6005 if (mode != VOIDmode && mode != BLKmode)
6007 int mode_alignment = GET_MODE_BITSIZE (mode);
6010 mode_alignment = 128;
6011 else if (mode == XCmode)
6012 mode_alignment = 256;
6013 if (COMPLEX_MODE_P (mode))
6014 mode_alignment /= 2;
6015 /* Misaligned fields are always returned in memory. */
6016 if (bit_offset % mode_alignment)
6020 /* for V1xx modes, just use the base mode */
6021 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6022 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6023 mode = GET_MODE_INNER (mode);
6025 /* Classification of atomic types. */
6030 classes[0] = X86_64_SSE_CLASS;
6033 classes[0] = X86_64_SSE_CLASS;
6034 classes[1] = X86_64_SSEUP_CLASS;
6044 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6048 classes[0] = X86_64_INTEGERSI_CLASS;
6051 else if (size <= 64)
6053 classes[0] = X86_64_INTEGER_CLASS;
6056 else if (size <= 64+32)
6058 classes[0] = X86_64_INTEGER_CLASS;
6059 classes[1] = X86_64_INTEGERSI_CLASS;
6062 else if (size <= 64+64)
6064 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6072 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6076 /* OImode shouldn't be used directly. */
6081 if (!(bit_offset % 64))
6082 classes[0] = X86_64_SSESF_CLASS;
6084 classes[0] = X86_64_SSE_CLASS;
6087 classes[0] = X86_64_SSEDF_CLASS;
6090 classes[0] = X86_64_X87_CLASS;
6091 classes[1] = X86_64_X87UP_CLASS;
6094 classes[0] = X86_64_SSE_CLASS;
6095 classes[1] = X86_64_SSEUP_CLASS;
6098 classes[0] = X86_64_SSE_CLASS;
6099 if (!(bit_offset % 64))
6105 if (!warned && warn_psabi)
6108 inform (input_location,
6109 "the ABI of passing structure with complex float"
6110 " member has changed in GCC 4.4");
6112 classes[1] = X86_64_SSESF_CLASS;
6116 classes[0] = X86_64_SSEDF_CLASS;
6117 classes[1] = X86_64_SSEDF_CLASS;
6120 classes[0] = X86_64_COMPLEX_X87_CLASS;
6123 /* This modes is larger than 16 bytes. */
6131 classes[0] = X86_64_SSE_CLASS;
6132 classes[1] = X86_64_SSEUP_CLASS;
6133 classes[2] = X86_64_SSEUP_CLASS;
6134 classes[3] = X86_64_SSEUP_CLASS;
6142 classes[0] = X86_64_SSE_CLASS;
6143 classes[1] = X86_64_SSEUP_CLASS;
6151 classes[0] = X86_64_SSE_CLASS;
6157 gcc_assert (VECTOR_MODE_P (mode));
6162 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6164 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6165 classes[0] = X86_64_INTEGERSI_CLASS;
6167 classes[0] = X86_64_INTEGER_CLASS;
6168 classes[1] = X86_64_INTEGER_CLASS;
6169 return 1 + (bytes > 8);
6173 /* Examine the argument and return set number of register required in each
6174 class. Return 0 iff parameter should be passed in memory. */
6176 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6177 int *int_nregs, int *sse_nregs)
6179 enum x86_64_reg_class regclass[MAX_CLASSES];
6180 int n = classify_argument (mode, type, regclass, 0);
6186 for (n--; n >= 0; n--)
6187 switch (regclass[n])
6189 case X86_64_INTEGER_CLASS:
6190 case X86_64_INTEGERSI_CLASS:
6193 case X86_64_SSE_CLASS:
6194 case X86_64_SSESF_CLASS:
6195 case X86_64_SSEDF_CLASS:
6198 case X86_64_NO_CLASS:
6199 case X86_64_SSEUP_CLASS:
6201 case X86_64_X87_CLASS:
6202 case X86_64_X87UP_CLASS:
6206 case X86_64_COMPLEX_X87_CLASS:
6207 return in_return ? 2 : 0;
6208 case X86_64_MEMORY_CLASS:
6214 /* Construct container for the argument used by GCC interface. See
6215 FUNCTION_ARG for the detailed description. */
6218 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6219 const_tree type, int in_return, int nintregs, int nsseregs,
6220 const int *intreg, int sse_regno)
6222 /* The following variables hold the static issued_error state. */
6223 static bool issued_sse_arg_error;
6224 static bool issued_sse_ret_error;
6225 static bool issued_x87_ret_error;
6227 enum machine_mode tmpmode;
6229 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6230 enum x86_64_reg_class regclass[MAX_CLASSES];
6234 int needed_sseregs, needed_intregs;
6235 rtx exp[MAX_CLASSES];
6238 n = classify_argument (mode, type, regclass, 0);
6241 if (!examine_argument (mode, type, in_return, &needed_intregs,
6244 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6247 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6248 some less clueful developer tries to use floating-point anyway. */
6249 if (needed_sseregs && !TARGET_SSE)
6253 if (!issued_sse_ret_error)
6255 error ("SSE register return with SSE disabled");
6256 issued_sse_ret_error = true;
6259 else if (!issued_sse_arg_error)
6261 error ("SSE register argument with SSE disabled");
6262 issued_sse_arg_error = true;
6267 /* Likewise, error if the ABI requires us to return values in the
6268 x87 registers and the user specified -mno-80387. */
6269 if (!TARGET_80387 && in_return)
6270 for (i = 0; i < n; i++)
6271 if (regclass[i] == X86_64_X87_CLASS
6272 || regclass[i] == X86_64_X87UP_CLASS
6273 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6275 if (!issued_x87_ret_error)
6277 error ("x87 register return with x87 disabled");
6278 issued_x87_ret_error = true;
6283 /* First construct simple cases. Avoid SCmode, since we want to use
6284 single register to pass this type. */
6285 if (n == 1 && mode != SCmode)
6286 switch (regclass[0])
6288 case X86_64_INTEGER_CLASS:
6289 case X86_64_INTEGERSI_CLASS:
6290 return gen_rtx_REG (mode, intreg[0]);
6291 case X86_64_SSE_CLASS:
6292 case X86_64_SSESF_CLASS:
6293 case X86_64_SSEDF_CLASS:
6294 if (mode != BLKmode)
6295 return gen_reg_or_parallel (mode, orig_mode,
6296 SSE_REGNO (sse_regno));
6298 case X86_64_X87_CLASS:
6299 case X86_64_COMPLEX_X87_CLASS:
6300 return gen_rtx_REG (mode, FIRST_STACK_REG);
6301 case X86_64_NO_CLASS:
6302 /* Zero sized array, struct or class. */
6307 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6308 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6309 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6311 && regclass[0] == X86_64_SSE_CLASS
6312 && regclass[1] == X86_64_SSEUP_CLASS
6313 && regclass[2] == X86_64_SSEUP_CLASS
6314 && regclass[3] == X86_64_SSEUP_CLASS
6316 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6319 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6320 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6321 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6322 && regclass[1] == X86_64_INTEGER_CLASS
6323 && (mode == CDImode || mode == TImode || mode == TFmode)
6324 && intreg[0] + 1 == intreg[1])
6325 return gen_rtx_REG (mode, intreg[0]);
6327 /* Otherwise figure out the entries of the PARALLEL. */
6328 for (i = 0; i < n; i++)
6332 switch (regclass[i])
6334 case X86_64_NO_CLASS:
6336 case X86_64_INTEGER_CLASS:
6337 case X86_64_INTEGERSI_CLASS:
6338 /* Merge TImodes on aligned occasions here too. */
6339 if (i * 8 + 8 > bytes)
6340 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6341 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6345 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6346 if (tmpmode == BLKmode)
6348 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6349 gen_rtx_REG (tmpmode, *intreg),
6353 case X86_64_SSESF_CLASS:
6354 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6355 gen_rtx_REG (SFmode,
6356 SSE_REGNO (sse_regno)),
6360 case X86_64_SSEDF_CLASS:
6361 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6362 gen_rtx_REG (DFmode,
6363 SSE_REGNO (sse_regno)),
6367 case X86_64_SSE_CLASS:
6375 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6385 && regclass[1] == X86_64_SSEUP_CLASS
6386 && regclass[2] == X86_64_SSEUP_CLASS
6387 && regclass[3] == X86_64_SSEUP_CLASS);
6394 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6395 gen_rtx_REG (tmpmode,
6396 SSE_REGNO (sse_regno)),
6405 /* Empty aligned struct, union or class. */
6409 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6410 for (i = 0; i < nexps; i++)
6411 XVECEXP (ret, 0, i) = exp [i];
6415 /* Update the data in CUM to advance over an argument of mode MODE
6416 and data type TYPE. (TYPE is null for libcalls where that information
6417 may not be available.) */
6420 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6421 const_tree type, HOST_WIDE_INT bytes,
6422 HOST_WIDE_INT words)
6438 cum->words += words;
6439 cum->nregs -= words;
6440 cum->regno += words;
6442 if (cum->nregs <= 0)
6450 /* OImode shouldn't be used directly. */
6454 if (cum->float_in_sse < 2)
6457 if (cum->float_in_sse < 1)
6474 if (!type || !AGGREGATE_TYPE_P (type))
6476 cum->sse_words += words;
6477 cum->sse_nregs -= 1;
6478 cum->sse_regno += 1;
6479 if (cum->sse_nregs <= 0)
6493 if (!type || !AGGREGATE_TYPE_P (type))
6495 cum->mmx_words += words;
6496 cum->mmx_nregs -= 1;
6497 cum->mmx_regno += 1;
6498 if (cum->mmx_nregs <= 0)
6509 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6510 const_tree type, HOST_WIDE_INT words, bool named)
6512 int int_nregs, sse_nregs;
6514 /* Unnamed 256bit vector mode parameters are passed on stack. */
6515 if (!named && VALID_AVX256_REG_MODE (mode))
6518 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6519 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6521 cum->nregs -= int_nregs;
6522 cum->sse_nregs -= sse_nregs;
6523 cum->regno += int_nregs;
6524 cum->sse_regno += sse_nregs;
6528 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6529 cum->words = (cum->words + align - 1) & ~(align - 1);
6530 cum->words += words;
6535 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6536 HOST_WIDE_INT words)
6538 /* Otherwise, this should be passed indirect. */
6539 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6541 cum->words += words;
6549 /* Update the data in CUM to advance over an argument of mode MODE and
6550 data type TYPE. (TYPE is null for libcalls where that information
6551 may not be available.) */
6554 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6555 const_tree type, bool named)
6557 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6558 HOST_WIDE_INT bytes, words;
6560 if (mode == BLKmode)
6561 bytes = int_size_in_bytes (type);
6563 bytes = GET_MODE_SIZE (mode);
6564 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6567 mode = type_natural_mode (type, NULL);
6569 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6570 function_arg_advance_ms_64 (cum, bytes, words);
6571 else if (TARGET_64BIT)
6572 function_arg_advance_64 (cum, mode, type, words, named);
6574 function_arg_advance_32 (cum, mode, type, bytes, words);
6577 /* Define where to put the arguments to a function.
6578 Value is zero to push the argument on the stack,
6579 or a hard register in which to store the argument.
6581 MODE is the argument's machine mode.
6582 TYPE is the data type of the argument (as a tree).
6583 This is null for libcalls where that information may
6585 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6586 the preceding args and about the function being called.
6587 NAMED is nonzero if this argument is a named parameter
6588 (otherwise it is an extra parameter matching an ellipsis). */
6591 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6592 enum machine_mode orig_mode, const_tree type,
6593 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6595 static bool warnedsse, warnedmmx;
6597 /* Avoid the AL settings for the Unix64 ABI. */
6598 if (mode == VOIDmode)
6614 if (words <= cum->nregs)
6616 int regno = cum->regno;
6618 /* Fastcall allocates the first two DWORD (SImode) or
6619 smaller arguments to ECX and EDX if it isn't an
6625 || (type && AGGREGATE_TYPE_P (type)))
6628 /* ECX not EAX is the first allocated register. */
6629 if (regno == AX_REG)
6632 return gen_rtx_REG (mode, regno);
6637 if (cum->float_in_sse < 2)
6640 if (cum->float_in_sse < 1)
6644 /* In 32bit, we pass TImode in xmm registers. */
6651 if (!type || !AGGREGATE_TYPE_P (type))
6653 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6656 warning (0, "SSE vector argument without SSE enabled "
6660 return gen_reg_or_parallel (mode, orig_mode,
6661 cum->sse_regno + FIRST_SSE_REG);
6666 /* OImode shouldn't be used directly. */
6675 if (!type || !AGGREGATE_TYPE_P (type))
6678 return gen_reg_or_parallel (mode, orig_mode,
6679 cum->sse_regno + FIRST_SSE_REG);
6689 if (!type || !AGGREGATE_TYPE_P (type))
6691 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6694 warning (0, "MMX vector argument without MMX enabled "
6698 return gen_reg_or_parallel (mode, orig_mode,
6699 cum->mmx_regno + FIRST_MMX_REG);
6708 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6709 enum machine_mode orig_mode, const_tree type, bool named)
6711 /* Handle a hidden AL argument containing number of registers
6712 for varargs x86-64 functions. */
6713 if (mode == VOIDmode)
6714 return GEN_INT (cum->maybe_vaarg
6715 ? (cum->sse_nregs < 0
6716 ? X86_64_SSE_REGPARM_MAX
6731 /* Unnamed 256bit vector mode parameters are passed on stack. */
6737 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6739 &x86_64_int_parameter_registers [cum->regno],
6744 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6745 enum machine_mode orig_mode, bool named,
6746 HOST_WIDE_INT bytes)
6750 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6751 We use value of -2 to specify that current function call is MSABI. */
6752 if (mode == VOIDmode)
6753 return GEN_INT (-2);
6755 /* If we've run out of registers, it goes on the stack. */
6756 if (cum->nregs == 0)
6759 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6761 /* Only floating point modes are passed in anything but integer regs. */
6762 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6765 regno = cum->regno + FIRST_SSE_REG;
6770 /* Unnamed floating parameters are passed in both the
6771 SSE and integer registers. */
6772 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6773 t2 = gen_rtx_REG (mode, regno);
6774 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6775 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6776 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6779 /* Handle aggregated types passed in register. */
6780 if (orig_mode == BLKmode)
6782 if (bytes > 0 && bytes <= 8)
6783 mode = (bytes > 4 ? DImode : SImode);
6784 if (mode == BLKmode)
6788 return gen_reg_or_parallel (mode, orig_mode, regno);
6791 /* Return where to put the arguments to a function.
6792 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6794 MODE is the argument's machine mode. TYPE is the data type of the
6795 argument. It is null for libcalls where that information may not be
6796 available. CUM gives information about the preceding args and about
6797 the function being called. NAMED is nonzero if this argument is a
6798 named parameter (otherwise it is an extra parameter matching an
6802 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6803 const_tree type, bool named)
6805 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6806 enum machine_mode mode = omode;
6807 HOST_WIDE_INT bytes, words;
6810 if (mode == BLKmode)
6811 bytes = int_size_in_bytes (type);
6813 bytes = GET_MODE_SIZE (mode);
6814 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6816 /* To simplify the code below, represent vector types with a vector mode
6817 even if MMX/SSE are not active. */
6818 if (type && TREE_CODE (type) == VECTOR_TYPE)
6819 mode = type_natural_mode (type, cum);
6821 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6822 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6823 else if (TARGET_64BIT)
6824 arg = function_arg_64 (cum, mode, omode, type, named);
6826 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6828 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6830 /* This argument uses 256bit AVX modes. */
6832 cfun->machine->callee_pass_avx256_p = true;
6834 cfun->machine->caller_pass_avx256_p = true;
6840 /* A C expression that indicates when an argument must be passed by
6841 reference. If nonzero for an argument, a copy of that argument is
6842 made in memory and a pointer to the argument is passed instead of
6843 the argument itself. The pointer is passed in whatever way is
6844 appropriate for passing a pointer to that type. */
6847 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6848 enum machine_mode mode ATTRIBUTE_UNUSED,
6849 const_tree type, bool named ATTRIBUTE_UNUSED)
6851 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6853 /* See Windows x64 Software Convention. */
6854 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6856 int msize = (int) GET_MODE_SIZE (mode);
6859 /* Arrays are passed by reference. */
6860 if (TREE_CODE (type) == ARRAY_TYPE)
6863 if (AGGREGATE_TYPE_P (type))
6865 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6866 are passed by reference. */
6867 msize = int_size_in_bytes (type);
6871 /* __m128 is passed by reference. */
6873 case 1: case 2: case 4: case 8:
6879 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6885 /* Return true when TYPE should be 128bit aligned for 32bit argument
6886 passing ABI. XXX: This function is obsolete and is only used for
6887 checking psABI compatibility with previous versions of GCC. */
6890 ix86_compat_aligned_value_p (const_tree type)
6892 enum machine_mode mode = TYPE_MODE (type);
6893 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6897 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6899 if (TYPE_ALIGN (type) < 128)
6902 if (AGGREGATE_TYPE_P (type))
6904 /* Walk the aggregates recursively. */
6905 switch (TREE_CODE (type))
6909 case QUAL_UNION_TYPE:
6913 /* Walk all the structure fields. */
6914 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6916 if (TREE_CODE (field) == FIELD_DECL
6917 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6924 /* Just for use if some languages passes arrays by value. */
6925 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6936 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6937 XXX: This function is obsolete and is only used for checking psABI
6938 compatibility with previous versions of GCC. */
6941 ix86_compat_function_arg_boundary (enum machine_mode mode,
6942 const_tree type, unsigned int align)
6944 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6945 natural boundaries. */
6946 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6948 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6949 make an exception for SSE modes since these require 128bit
6952 The handling here differs from field_alignment. ICC aligns MMX
6953 arguments to 4 byte boundaries, while structure fields are aligned
6954 to 8 byte boundaries. */
6957 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6958 align = PARM_BOUNDARY;
6962 if (!ix86_compat_aligned_value_p (type))
6963 align = PARM_BOUNDARY;
6966 if (align > BIGGEST_ALIGNMENT)
6967 align = BIGGEST_ALIGNMENT;
6971 /* Return true when TYPE should be 128bit aligned for 32bit argument
6975 ix86_contains_aligned_value_p (const_tree type)
6977 enum machine_mode mode = TYPE_MODE (type);
6979 if (mode == XFmode || mode == XCmode)
6982 if (TYPE_ALIGN (type) < 128)
6985 if (AGGREGATE_TYPE_P (type))
6987 /* Walk the aggregates recursively. */
6988 switch (TREE_CODE (type))
6992 case QUAL_UNION_TYPE:
6996 /* Walk all the structure fields. */
6997 for (field = TYPE_FIELDS (type);
6999 field = DECL_CHAIN (field))
7001 if (TREE_CODE (field) == FIELD_DECL
7002 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7009 /* Just for use if some languages passes arrays by value. */
7010 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7019 return TYPE_ALIGN (type) >= 128;
7024 /* Gives the alignment boundary, in bits, of an argument with the
7025 specified mode and type. */
7028 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7033 /* Since the main variant type is used for call, we convert it to
7034 the main variant type. */
7035 type = TYPE_MAIN_VARIANT (type);
7036 align = TYPE_ALIGN (type);
7039 align = GET_MODE_ALIGNMENT (mode);
7040 if (align < PARM_BOUNDARY)
7041 align = PARM_BOUNDARY;
7045 unsigned int saved_align = align;
7049 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7052 if (mode == XFmode || mode == XCmode)
7053 align = PARM_BOUNDARY;
7055 else if (!ix86_contains_aligned_value_p (type))
7056 align = PARM_BOUNDARY;
7059 align = PARM_BOUNDARY;
7064 && align != ix86_compat_function_arg_boundary (mode, type,
7068 inform (input_location,
7069 "The ABI for passing parameters with %d-byte"
7070 " alignment has changed in GCC 4.6",
7071 align / BITS_PER_UNIT);
7078 /* Return true if N is a possible register number of function value. */
7081 ix86_function_value_regno_p (const unsigned int regno)
7088 case FIRST_FLOAT_REG:
7089 /* TODO: The function should depend on current function ABI but
7090 builtins.c would need updating then. Therefore we use the
7092 if (TARGET_64BIT && ix86_abi == MS_ABI)
7094 return TARGET_FLOAT_RETURNS_IN_80387;
7100 if (TARGET_MACHO || TARGET_64BIT)
7108 /* Define how to find the value returned by a function.
7109 VALTYPE is the data type of the value (as a tree).
7110 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7111 otherwise, FUNC is 0. */
7114 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7115 const_tree fntype, const_tree fn)
7119 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7120 we normally prevent this case when mmx is not available. However
7121 some ABIs may require the result to be returned like DImode. */
7122 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7123 regno = FIRST_MMX_REG;
7125 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7126 we prevent this case when sse is not available. However some ABIs
7127 may require the result to be returned like integer TImode. */
7128 else if (mode == TImode
7129 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7130 regno = FIRST_SSE_REG;
7132 /* 32-byte vector modes in %ymm0. */
7133 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7134 regno = FIRST_SSE_REG;
7136 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7137 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7138 regno = FIRST_FLOAT_REG;
7140 /* Most things go in %eax. */
7143 /* Override FP return register with %xmm0 for local functions when
7144 SSE math is enabled or for functions with sseregparm attribute. */
7145 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7147 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7148 if ((sse_level >= 1 && mode == SFmode)
7149 || (sse_level == 2 && mode == DFmode))
7150 regno = FIRST_SSE_REG;
7153 /* OImode shouldn't be used directly. */
7154 gcc_assert (mode != OImode);
7156 return gen_rtx_REG (orig_mode, regno);
7160 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7165 /* Handle libcalls, which don't provide a type node. */
7166 if (valtype == NULL)
7180 regno = FIRST_SSE_REG;
7184 regno = FIRST_FLOAT_REG;
7192 return gen_rtx_REG (mode, regno);
7194 else if (POINTER_TYPE_P (valtype))
7196 /* Pointers are always returned in Pmode. */
7200 ret = construct_container (mode, orig_mode, valtype, 1,
7201 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7202 x86_64_int_return_registers, 0);
7204 /* For zero sized structures, construct_container returns NULL, but we
7205 need to keep rest of compiler happy by returning meaningful value. */
7207 ret = gen_rtx_REG (orig_mode, AX_REG);
7213 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7215 unsigned int regno = AX_REG;
7219 switch (GET_MODE_SIZE (mode))
7222 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7223 && !COMPLEX_MODE_P (mode))
7224 regno = FIRST_SSE_REG;
7228 if (mode == SFmode || mode == DFmode)
7229 regno = FIRST_SSE_REG;
7235 return gen_rtx_REG (orig_mode, regno);
7239 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7240 enum machine_mode orig_mode, enum machine_mode mode)
7242 const_tree fn, fntype;
7245 if (fntype_or_decl && DECL_P (fntype_or_decl))
7246 fn = fntype_or_decl;
7247 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7249 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7250 return function_value_ms_64 (orig_mode, mode);
7251 else if (TARGET_64BIT)
7252 return function_value_64 (orig_mode, mode, valtype);
7254 return function_value_32 (orig_mode, mode, fntype, fn);
7258 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7259 bool outgoing ATTRIBUTE_UNUSED)
7261 enum machine_mode mode, orig_mode;
7263 orig_mode = TYPE_MODE (valtype);
7264 mode = type_natural_mode (valtype, NULL);
7265 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7268 /* Pointer function arguments and return values are promoted to Pmode. */
7270 static enum machine_mode
7271 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7272 int *punsignedp, const_tree fntype,
7275 if (type != NULL_TREE && POINTER_TYPE_P (type))
7277 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7280 return default_promote_function_mode (type, mode, punsignedp, fntype,
7285 ix86_libcall_value (enum machine_mode mode)
7287 return ix86_function_value_1 (NULL, NULL, mode, mode);
7290 /* Return true iff type is returned in memory. */
7292 static bool ATTRIBUTE_UNUSED
7293 return_in_memory_32 (const_tree type, enum machine_mode mode)
7297 if (mode == BLKmode)
7300 size = int_size_in_bytes (type);
7302 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7305 if (VECTOR_MODE_P (mode) || mode == TImode)
7307 /* User-created vectors small enough to fit in EAX. */
7311 /* MMX/3dNow values are returned in MM0,
7312 except when it doesn't exits or the ABI prescribes otherwise. */
7314 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7316 /* SSE values are returned in XMM0, except when it doesn't exist. */
7320 /* AVX values are returned in YMM0, except when it doesn't exist. */
7331 /* OImode shouldn't be used directly. */
7332 gcc_assert (mode != OImode);
7337 static bool ATTRIBUTE_UNUSED
7338 return_in_memory_64 (const_tree type, enum machine_mode mode)
7340 int needed_intregs, needed_sseregs;
7341 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7344 static bool ATTRIBUTE_UNUSED
7345 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7347 HOST_WIDE_INT size = int_size_in_bytes (type);
7349 /* __m128 is returned in xmm0. */
7350 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7351 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7354 /* Otherwise, the size must be exactly in [1248]. */
7355 return size != 1 && size != 2 && size != 4 && size != 8;
7359 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7361 #ifdef SUBTARGET_RETURN_IN_MEMORY
7362 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7364 const enum machine_mode mode = type_natural_mode (type, NULL);
7368 if (ix86_function_type_abi (fntype) == MS_ABI)
7369 return return_in_memory_ms_64 (type, mode);
7371 return return_in_memory_64 (type, mode);
7374 return return_in_memory_32 (type, mode);
7378 /* When returning SSE vector types, we have a choice of either
7379 (1) being abi incompatible with a -march switch, or
7380 (2) generating an error.
7381 Given no good solution, I think the safest thing is one warning.
7382 The user won't be able to use -Werror, but....
7384 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7385 called in response to actually generating a caller or callee that
7386 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7387 via aggregate_value_p for general type probing from tree-ssa. */
7390 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7392 static bool warnedsse, warnedmmx;
7394 if (!TARGET_64BIT && type)
7396 /* Look at the return type of the function, not the function type. */
7397 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7399 if (!TARGET_SSE && !warnedsse)
7402 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7405 warning (0, "SSE vector return without SSE enabled "
7410 if (!TARGET_MMX && !warnedmmx)
7412 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7415 warning (0, "MMX vector return without MMX enabled "
7425 /* Create the va_list data type. */
7427 /* Returns the calling convention specific va_list date type.
7428 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7431 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7433 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7435 /* For i386 we use plain pointer to argument area. */
7436 if (!TARGET_64BIT || abi == MS_ABI)
7437 return build_pointer_type (char_type_node);
7439 record = lang_hooks.types.make_type (RECORD_TYPE);
7440 type_decl = build_decl (BUILTINS_LOCATION,
7441 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7443 f_gpr = build_decl (BUILTINS_LOCATION,
7444 FIELD_DECL, get_identifier ("gp_offset"),
7445 unsigned_type_node);
7446 f_fpr = build_decl (BUILTINS_LOCATION,
7447 FIELD_DECL, get_identifier ("fp_offset"),
7448 unsigned_type_node);
7449 f_ovf = build_decl (BUILTINS_LOCATION,
7450 FIELD_DECL, get_identifier ("overflow_arg_area"),
7452 f_sav = build_decl (BUILTINS_LOCATION,
7453 FIELD_DECL, get_identifier ("reg_save_area"),
7456 va_list_gpr_counter_field = f_gpr;
7457 va_list_fpr_counter_field = f_fpr;
7459 DECL_FIELD_CONTEXT (f_gpr) = record;
7460 DECL_FIELD_CONTEXT (f_fpr) = record;
7461 DECL_FIELD_CONTEXT (f_ovf) = record;
7462 DECL_FIELD_CONTEXT (f_sav) = record;
7464 TYPE_STUB_DECL (record) = type_decl;
7465 TYPE_NAME (record) = type_decl;
7466 TYPE_FIELDS (record) = f_gpr;
7467 DECL_CHAIN (f_gpr) = f_fpr;
7468 DECL_CHAIN (f_fpr) = f_ovf;
7469 DECL_CHAIN (f_ovf) = f_sav;
7471 layout_type (record);
7473 /* The correct type is an array type of one element. */
7474 return build_array_type (record, build_index_type (size_zero_node));
7477 /* Setup the builtin va_list data type and for 64-bit the additional
7478 calling convention specific va_list data types. */
7481 ix86_build_builtin_va_list (void)
7483 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7485 /* Initialize abi specific va_list builtin types. */
7489 if (ix86_abi == MS_ABI)
7491 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7492 if (TREE_CODE (t) != RECORD_TYPE)
7493 t = build_variant_type_copy (t);
7494 sysv_va_list_type_node = t;
7499 if (TREE_CODE (t) != RECORD_TYPE)
7500 t = build_variant_type_copy (t);
7501 sysv_va_list_type_node = t;
7503 if (ix86_abi != MS_ABI)
7505 t = ix86_build_builtin_va_list_abi (MS_ABI);
7506 if (TREE_CODE (t) != RECORD_TYPE)
7507 t = build_variant_type_copy (t);
7508 ms_va_list_type_node = t;
7513 if (TREE_CODE (t) != RECORD_TYPE)
7514 t = build_variant_type_copy (t);
7515 ms_va_list_type_node = t;
7522 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7525 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7531 /* GPR size of varargs save area. */
7532 if (cfun->va_list_gpr_size)
7533 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7535 ix86_varargs_gpr_size = 0;
7537 /* FPR size of varargs save area. We don't need it if we don't pass
7538 anything in SSE registers. */
7539 if (TARGET_SSE && cfun->va_list_fpr_size)
7540 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7542 ix86_varargs_fpr_size = 0;
7544 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7547 save_area = frame_pointer_rtx;
7548 set = get_varargs_alias_set ();
7550 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7551 if (max > X86_64_REGPARM_MAX)
7552 max = X86_64_REGPARM_MAX;
7554 for (i = cum->regno; i < max; i++)
7556 mem = gen_rtx_MEM (Pmode,
7557 plus_constant (save_area, i * UNITS_PER_WORD));
7558 MEM_NOTRAP_P (mem) = 1;
7559 set_mem_alias_set (mem, set);
7560 emit_move_insn (mem, gen_rtx_REG (Pmode,
7561 x86_64_int_parameter_registers[i]));
7564 if (ix86_varargs_fpr_size)
7566 enum machine_mode smode;
7569 /* Now emit code to save SSE registers. The AX parameter contains number
7570 of SSE parameter registers used to call this function, though all we
7571 actually check here is the zero/non-zero status. */
7573 label = gen_label_rtx ();
7574 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7575 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7578 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7579 we used movdqa (i.e. TImode) instead? Perhaps even better would
7580 be if we could determine the real mode of the data, via a hook
7581 into pass_stdarg. Ignore all that for now. */
7583 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7584 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7586 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7587 if (max > X86_64_SSE_REGPARM_MAX)
7588 max = X86_64_SSE_REGPARM_MAX;
7590 for (i = cum->sse_regno; i < max; ++i)
7592 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7593 mem = gen_rtx_MEM (smode, mem);
7594 MEM_NOTRAP_P (mem) = 1;
7595 set_mem_alias_set (mem, set);
7596 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7598 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7606 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7608 alias_set_type set = get_varargs_alias_set ();
7611 /* Reset to zero, as there might be a sysv vaarg used
7613 ix86_varargs_gpr_size = 0;
7614 ix86_varargs_fpr_size = 0;
7616 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7620 mem = gen_rtx_MEM (Pmode,
7621 plus_constant (virtual_incoming_args_rtx,
7622 i * UNITS_PER_WORD));
7623 MEM_NOTRAP_P (mem) = 1;
7624 set_mem_alias_set (mem, set);
7626 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7627 emit_move_insn (mem, reg);
7632 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7633 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7636 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7637 CUMULATIVE_ARGS next_cum;
7640 /* This argument doesn't appear to be used anymore. Which is good,
7641 because the old code here didn't suppress rtl generation. */
7642 gcc_assert (!no_rtl);
7647 fntype = TREE_TYPE (current_function_decl);
7649 /* For varargs, we do not want to skip the dummy va_dcl argument.
7650 For stdargs, we do want to skip the last named argument. */
7652 if (stdarg_p (fntype))
7653 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7656 if (cum->call_abi == MS_ABI)
7657 setup_incoming_varargs_ms_64 (&next_cum);
7659 setup_incoming_varargs_64 (&next_cum);
7662 /* Checks if TYPE is of kind va_list char *. */
7665 is_va_list_char_pointer (tree type)
7669 /* For 32-bit it is always true. */
7672 canonic = ix86_canonical_va_list_type (type);
7673 return (canonic == ms_va_list_type_node
7674 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7677 /* Implement va_start. */
7680 ix86_va_start (tree valist, rtx nextarg)
7682 HOST_WIDE_INT words, n_gpr, n_fpr;
7683 tree f_gpr, f_fpr, f_ovf, f_sav;
7684 tree gpr, fpr, ovf, sav, t;
7688 if (flag_split_stack
7689 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7691 unsigned int scratch_regno;
7693 /* When we are splitting the stack, we can't refer to the stack
7694 arguments using internal_arg_pointer, because they may be on
7695 the old stack. The split stack prologue will arrange to
7696 leave a pointer to the old stack arguments in a scratch
7697 register, which we here copy to a pseudo-register. The split
7698 stack prologue can't set the pseudo-register directly because
7699 it (the prologue) runs before any registers have been saved. */
7701 scratch_regno = split_stack_prologue_scratch_regno ();
7702 if (scratch_regno != INVALID_REGNUM)
7706 reg = gen_reg_rtx (Pmode);
7707 cfun->machine->split_stack_varargs_pointer = reg;
7710 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7714 push_topmost_sequence ();
7715 emit_insn_after (seq, entry_of_function ());
7716 pop_topmost_sequence ();
7720 /* Only 64bit target needs something special. */
7721 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7723 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7724 std_expand_builtin_va_start (valist, nextarg);
7729 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7730 next = expand_binop (ptr_mode, add_optab,
7731 cfun->machine->split_stack_varargs_pointer,
7732 crtl->args.arg_offset_rtx,
7733 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7734 convert_move (va_r, next, 0);
7739 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7740 f_fpr = DECL_CHAIN (f_gpr);
7741 f_ovf = DECL_CHAIN (f_fpr);
7742 f_sav = DECL_CHAIN (f_ovf);
7744 valist = build_simple_mem_ref (valist);
7745 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7746 /* The following should be folded into the MEM_REF offset. */
7747 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7749 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7751 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7753 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7756 /* Count number of gp and fp argument registers used. */
7757 words = crtl->args.info.words;
7758 n_gpr = crtl->args.info.regno;
7759 n_fpr = crtl->args.info.sse_regno;
7761 if (cfun->va_list_gpr_size)
7763 type = TREE_TYPE (gpr);
7764 t = build2 (MODIFY_EXPR, type,
7765 gpr, build_int_cst (type, n_gpr * 8));
7766 TREE_SIDE_EFFECTS (t) = 1;
7767 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7770 if (TARGET_SSE && cfun->va_list_fpr_size)
7772 type = TREE_TYPE (fpr);
7773 t = build2 (MODIFY_EXPR, type, fpr,
7774 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7775 TREE_SIDE_EFFECTS (t) = 1;
7776 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7779 /* Find the overflow area. */
7780 type = TREE_TYPE (ovf);
7781 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7782 ovf_rtx = crtl->args.internal_arg_pointer;
7784 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7785 t = make_tree (type, ovf_rtx);
7787 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7788 t = build2 (MODIFY_EXPR, type, ovf, t);
7789 TREE_SIDE_EFFECTS (t) = 1;
7790 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7792 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7794 /* Find the register save area.
7795 Prologue of the function save it right above stack frame. */
7796 type = TREE_TYPE (sav);
7797 t = make_tree (type, frame_pointer_rtx);
7798 if (!ix86_varargs_gpr_size)
7799 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7800 t = build2 (MODIFY_EXPR, type, sav, t);
7801 TREE_SIDE_EFFECTS (t) = 1;
7802 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7806 /* Implement va_arg. */
7809 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7812 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7813 tree f_gpr, f_fpr, f_ovf, f_sav;
7814 tree gpr, fpr, ovf, sav, t;
7816 tree lab_false, lab_over = NULL_TREE;
7821 enum machine_mode nat_mode;
7822 unsigned int arg_boundary;
7824 /* Only 64bit target needs something special. */
7825 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7826 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7828 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7829 f_fpr = DECL_CHAIN (f_gpr);
7830 f_ovf = DECL_CHAIN (f_fpr);
7831 f_sav = DECL_CHAIN (f_ovf);
7833 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7834 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7835 valist = build_va_arg_indirect_ref (valist);
7836 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7837 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7838 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7840 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7842 type = build_pointer_type (type);
7843 size = int_size_in_bytes (type);
7844 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7846 nat_mode = type_natural_mode (type, NULL);
7855 /* Unnamed 256bit vector mode parameters are passed on stack. */
7856 if (!TARGET_64BIT_MS_ABI)
7863 container = construct_container (nat_mode, TYPE_MODE (type),
7864 type, 0, X86_64_REGPARM_MAX,
7865 X86_64_SSE_REGPARM_MAX, intreg,
7870 /* Pull the value out of the saved registers. */
7872 addr = create_tmp_var (ptr_type_node, "addr");
7876 int needed_intregs, needed_sseregs;
7878 tree int_addr, sse_addr;
7880 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7881 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7883 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7885 need_temp = (!REG_P (container)
7886 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7887 || TYPE_ALIGN (type) > 128));
7889 /* In case we are passing structure, verify that it is consecutive block
7890 on the register save area. If not we need to do moves. */
7891 if (!need_temp && !REG_P (container))
7893 /* Verify that all registers are strictly consecutive */
7894 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7898 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7900 rtx slot = XVECEXP (container, 0, i);
7901 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7902 || INTVAL (XEXP (slot, 1)) != i * 16)
7910 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7912 rtx slot = XVECEXP (container, 0, i);
7913 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7914 || INTVAL (XEXP (slot, 1)) != i * 8)
7926 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7927 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7930 /* First ensure that we fit completely in registers. */
7933 t = build_int_cst (TREE_TYPE (gpr),
7934 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7935 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7936 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7937 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7938 gimplify_and_add (t, pre_p);
7942 t = build_int_cst (TREE_TYPE (fpr),
7943 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7944 + X86_64_REGPARM_MAX * 8);
7945 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7946 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7947 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7948 gimplify_and_add (t, pre_p);
7951 /* Compute index to start of area used for integer regs. */
7954 /* int_addr = gpr + sav; */
7955 t = fold_build_pointer_plus (sav, gpr);
7956 gimplify_assign (int_addr, t, pre_p);
7960 /* sse_addr = fpr + sav; */
7961 t = fold_build_pointer_plus (sav, fpr);
7962 gimplify_assign (sse_addr, t, pre_p);
7966 int i, prev_size = 0;
7967 tree temp = create_tmp_var (type, "va_arg_tmp");
7970 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7971 gimplify_assign (addr, t, pre_p);
7973 for (i = 0; i < XVECLEN (container, 0); i++)
7975 rtx slot = XVECEXP (container, 0, i);
7976 rtx reg = XEXP (slot, 0);
7977 enum machine_mode mode = GET_MODE (reg);
7983 tree dest_addr, dest;
7984 int cur_size = GET_MODE_SIZE (mode);
7986 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7987 prev_size = INTVAL (XEXP (slot, 1));
7988 if (prev_size + cur_size > size)
7990 cur_size = size - prev_size;
7991 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7992 if (mode == BLKmode)
7995 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7996 if (mode == GET_MODE (reg))
7997 addr_type = build_pointer_type (piece_type);
7999 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8001 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8004 if (SSE_REGNO_P (REGNO (reg)))
8006 src_addr = sse_addr;
8007 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8011 src_addr = int_addr;
8012 src_offset = REGNO (reg) * 8;
8014 src_addr = fold_convert (addr_type, src_addr);
8015 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8017 dest_addr = fold_convert (daddr_type, addr);
8018 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8019 if (cur_size == GET_MODE_SIZE (mode))
8021 src = build_va_arg_indirect_ref (src_addr);
8022 dest = build_va_arg_indirect_ref (dest_addr);
8024 gimplify_assign (dest, src, pre_p);
8029 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8030 3, dest_addr, src_addr,
8031 size_int (cur_size));
8032 gimplify_and_add (copy, pre_p);
8034 prev_size += cur_size;
8040 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8041 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8042 gimplify_assign (gpr, t, pre_p);
8047 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8048 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8049 gimplify_assign (fpr, t, pre_p);
8052 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8054 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8057 /* ... otherwise out of the overflow area. */
8059 /* When we align parameter on stack for caller, if the parameter
8060 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8061 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8062 here with caller. */
8063 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8064 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8065 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8067 /* Care for on-stack alignment if needed. */
8068 if (arg_boundary <= 64 || size == 0)
8072 HOST_WIDE_INT align = arg_boundary / 8;
8073 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8074 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8075 build_int_cst (TREE_TYPE (t), -align));
8078 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8079 gimplify_assign (addr, t, pre_p);
8081 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8082 gimplify_assign (unshare_expr (ovf), t, pre_p);
8085 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8087 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8088 addr = fold_convert (ptrtype, addr);
8091 addr = build_va_arg_indirect_ref (addr);
8092 return build_va_arg_indirect_ref (addr);
8095 /* Return true if OPNUM's MEM should be matched
8096 in movabs* patterns. */
8099 ix86_check_movabs (rtx insn, int opnum)
8103 set = PATTERN (insn);
8104 if (GET_CODE (set) == PARALLEL)
8105 set = XVECEXP (set, 0, 0);
8106 gcc_assert (GET_CODE (set) == SET);
8107 mem = XEXP (set, opnum);
8108 while (GET_CODE (mem) == SUBREG)
8109 mem = SUBREG_REG (mem);
8110 gcc_assert (MEM_P (mem));
8111 return volatile_ok || !MEM_VOLATILE_P (mem);
8114 /* Initialize the table of extra 80387 mathematical constants. */
8117 init_ext_80387_constants (void)
8119 static const char * cst[5] =
8121 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8122 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8123 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8124 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8125 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8129 for (i = 0; i < 5; i++)
8131 real_from_string (&ext_80387_constants_table[i], cst[i]);
8132 /* Ensure each constant is rounded to XFmode precision. */
8133 real_convert (&ext_80387_constants_table[i],
8134 XFmode, &ext_80387_constants_table[i]);
8137 ext_80387_constants_init = 1;
8140 /* Return non-zero if the constant is something that
8141 can be loaded with a special instruction. */
8144 standard_80387_constant_p (rtx x)
8146 enum machine_mode mode = GET_MODE (x);
8150 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8153 if (x == CONST0_RTX (mode))
8155 if (x == CONST1_RTX (mode))
8158 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8160 /* For XFmode constants, try to find a special 80387 instruction when
8161 optimizing for size or on those CPUs that benefit from them. */
8163 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8167 if (! ext_80387_constants_init)
8168 init_ext_80387_constants ();
8170 for (i = 0; i < 5; i++)
8171 if (real_identical (&r, &ext_80387_constants_table[i]))
8175 /* Load of the constant -0.0 or -1.0 will be split as
8176 fldz;fchs or fld1;fchs sequence. */
8177 if (real_isnegzero (&r))
8179 if (real_identical (&r, &dconstm1))
8185 /* Return the opcode of the special instruction to be used to load
8189 standard_80387_constant_opcode (rtx x)
8191 switch (standard_80387_constant_p (x))
8215 /* Return the CONST_DOUBLE representing the 80387 constant that is
8216 loaded by the specified special instruction. The argument IDX
8217 matches the return value from standard_80387_constant_p. */
8220 standard_80387_constant_rtx (int idx)
8224 if (! ext_80387_constants_init)
8225 init_ext_80387_constants ();
8241 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8245 /* Return 1 if X is all 0s and 2 if x is all 1s
8246 in supported SSE/AVX vector mode. */
8249 standard_sse_constant_p (rtx x)
8251 enum machine_mode mode = GET_MODE (x);
8253 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8255 if (vector_all_ones_operand (x, mode))
8277 /* Return the opcode of the special instruction to be used to load
8281 standard_sse_constant_opcode (rtx insn, rtx x)
8283 switch (standard_sse_constant_p (x))
8286 switch (get_attr_mode (insn))
8289 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8290 return "%vpxor\t%0, %d0";
8292 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8293 return "%vxorpd\t%0, %d0";
8295 return "%vxorps\t%0, %d0";
8298 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8299 return "vpxor\t%x0, %x0, %x0";
8301 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8302 return "vxorpd\t%x0, %x0, %x0";
8304 return "vxorps\t%x0, %x0, %x0";
8312 return "vpcmpeqd\t%0, %0, %0";
8314 return "pcmpeqd\t%0, %0";
8322 /* Returns true if OP contains a symbol reference */
8325 symbolic_reference_mentioned_p (rtx op)
8330 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8333 fmt = GET_RTX_FORMAT (GET_CODE (op));
8334 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8340 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8341 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8345 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8352 /* Return true if it is appropriate to emit `ret' instructions in the
8353 body of a function. Do this only if the epilogue is simple, needing a
8354 couple of insns. Prior to reloading, we can't tell how many registers
8355 must be saved, so return false then. Return false if there is no frame
8356 marker to de-allocate. */
8359 ix86_can_use_return_insn_p (void)
8361 struct ix86_frame frame;
8363 if (! reload_completed || frame_pointer_needed)
8366 /* Don't allow more than 32k pop, since that's all we can do
8367 with one instruction. */
8368 if (crtl->args.pops_args && crtl->args.size >= 32768)
8371 ix86_compute_frame_layout (&frame);
8372 return (frame.stack_pointer_offset == UNITS_PER_WORD
8373 && (frame.nregs + frame.nsseregs) == 0);
8376 /* Value should be nonzero if functions must have frame pointers.
8377 Zero means the frame pointer need not be set up (and parms may
8378 be accessed via the stack pointer) in functions that seem suitable. */
8381 ix86_frame_pointer_required (void)
8383 /* If we accessed previous frames, then the generated code expects
8384 to be able to access the saved ebp value in our frame. */
8385 if (cfun->machine->accesses_prev_frame)
8388 /* Several x86 os'es need a frame pointer for other reasons,
8389 usually pertaining to setjmp. */
8390 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8393 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8394 turns off the frame pointer by default. Turn it back on now if
8395 we've not got a leaf function. */
8396 if (TARGET_OMIT_LEAF_FRAME_POINTER
8397 && (!current_function_is_leaf
8398 || ix86_current_function_calls_tls_descriptor))
8401 if (crtl->profile && !flag_fentry)
8407 /* Record that the current function accesses previous call frames. */
8410 ix86_setup_frame_addresses (void)
8412 cfun->machine->accesses_prev_frame = 1;
8415 #ifndef USE_HIDDEN_LINKONCE
8416 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8417 # define USE_HIDDEN_LINKONCE 1
8419 # define USE_HIDDEN_LINKONCE 0
8423 static int pic_labels_used;
8425 /* Fills in the label name that should be used for a pc thunk for
8426 the given register. */
8429 get_pc_thunk_name (char name[32], unsigned int regno)
8431 gcc_assert (!TARGET_64BIT);
8433 if (USE_HIDDEN_LINKONCE)
8434 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
8436 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8440 /* This function generates code for -fpic that loads %ebx with
8441 the return address of the caller and then returns. */
8444 ix86_code_end (void)
8449 for (regno = AX_REG; regno <= SP_REG; regno++)
8454 if (!(pic_labels_used & (1 << regno)))
8457 get_pc_thunk_name (name, regno);
8459 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8460 get_identifier (name),
8461 build_function_type_list (void_type_node, NULL_TREE));
8462 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8463 NULL_TREE, void_type_node);
8464 TREE_PUBLIC (decl) = 1;
8465 TREE_STATIC (decl) = 1;
8470 switch_to_section (darwin_sections[text_coal_section]);
8471 fputs ("\t.weak_definition\t", asm_out_file);
8472 assemble_name (asm_out_file, name);
8473 fputs ("\n\t.private_extern\t", asm_out_file);
8474 assemble_name (asm_out_file, name);
8475 putc ('\n', asm_out_file);
8476 ASM_OUTPUT_LABEL (asm_out_file, name);
8477 DECL_WEAK (decl) = 1;
8481 if (USE_HIDDEN_LINKONCE)
8483 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8485 targetm.asm_out.unique_section (decl, 0);
8486 switch_to_section (get_named_section (decl, NULL, 0));
8488 targetm.asm_out.globalize_label (asm_out_file, name);
8489 fputs ("\t.hidden\t", asm_out_file);
8490 assemble_name (asm_out_file, name);
8491 putc ('\n', asm_out_file);
8492 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8496 switch_to_section (text_section);
8497 ASM_OUTPUT_LABEL (asm_out_file, name);
8500 DECL_INITIAL (decl) = make_node (BLOCK);
8501 current_function_decl = decl;
8502 init_function_start (decl);
8503 first_function_block_is_cold = false;
8504 /* Make sure unwind info is emitted for the thunk if needed. */
8505 final_start_function (emit_barrier (), asm_out_file, 1);
8507 /* Pad stack IP move with 4 instructions (two NOPs count
8508 as one instruction). */
8509 if (TARGET_PAD_SHORT_FUNCTION)
8514 fputs ("\tnop\n", asm_out_file);
8517 xops[0] = gen_rtx_REG (Pmode, regno);
8518 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8519 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8520 fputs ("\tret\n", asm_out_file);
8521 final_end_function ();
8522 init_insn_lengths ();
8523 free_after_compilation (cfun);
8525 current_function_decl = NULL;
8528 if (flag_split_stack)
8529 file_end_indicate_split_stack ();
8532 /* Emit code for the SET_GOT patterns. */
8535 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8541 if (TARGET_VXWORKS_RTP && flag_pic)
8543 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8544 xops[2] = gen_rtx_MEM (Pmode,
8545 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8546 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8548 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8549 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8550 an unadorned address. */
8551 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8552 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8553 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8557 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8561 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8563 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8566 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8567 is what will be referenced by the Mach-O PIC subsystem. */
8569 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8572 targetm.asm_out.internal_label (asm_out_file, "L",
8573 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8578 get_pc_thunk_name (name, REGNO (dest));
8579 pic_labels_used |= 1 << REGNO (dest);
8581 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8582 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8583 output_asm_insn ("call\t%X2", xops);
8584 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8585 is what will be referenced by the Mach-O PIC subsystem. */
8588 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8590 targetm.asm_out.internal_label (asm_out_file, "L",
8591 CODE_LABEL_NUMBER (label));
8596 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8601 /* Generate an "push" pattern for input ARG. */
8606 struct machine_function *m = cfun->machine;
8608 if (m->fs.cfa_reg == stack_pointer_rtx)
8609 m->fs.cfa_offset += UNITS_PER_WORD;
8610 m->fs.sp_offset += UNITS_PER_WORD;
8612 return gen_rtx_SET (VOIDmode,
8614 gen_rtx_PRE_DEC (Pmode,
8615 stack_pointer_rtx)),
8619 /* Generate an "pop" pattern for input ARG. */
8624 return gen_rtx_SET (VOIDmode,
8627 gen_rtx_POST_INC (Pmode,
8628 stack_pointer_rtx)));
8631 /* Return >= 0 if there is an unused call-clobbered register available
8632 for the entire function. */
8635 ix86_select_alt_pic_regnum (void)
8637 if (current_function_is_leaf
8639 && !ix86_current_function_calls_tls_descriptor)
8642 /* Can't use the same register for both PIC and DRAP. */
8644 drap = REGNO (crtl->drap_reg);
8647 for (i = 2; i >= 0; --i)
8648 if (i != drap && !df_regs_ever_live_p (i))
8652 return INVALID_REGNUM;
8655 /* Return TRUE if we need to save REGNO. */
8658 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8660 if (pic_offset_table_rtx
8661 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8662 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8664 || crtl->calls_eh_return
8665 || crtl->uses_const_pool))
8666 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8668 if (crtl->calls_eh_return && maybe_eh_return)
8673 unsigned test = EH_RETURN_DATA_REGNO (i);
8674 if (test == INVALID_REGNUM)
8681 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8684 return (df_regs_ever_live_p (regno)
8685 && !call_used_regs[regno]
8686 && !fixed_regs[regno]
8687 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8690 /* Return number of saved general prupose registers. */
8693 ix86_nsaved_regs (void)
8698 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8699 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8704 /* Return number of saved SSE registrers. */
8707 ix86_nsaved_sseregs (void)
8712 if (!TARGET_64BIT_MS_ABI)
8714 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8715 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8720 /* Given FROM and TO register numbers, say whether this elimination is
8721 allowed. If stack alignment is needed, we can only replace argument
8722 pointer with hard frame pointer, or replace frame pointer with stack
8723 pointer. Otherwise, frame pointer elimination is automatically
8724 handled and all other eliminations are valid. */
8727 ix86_can_eliminate (const int from, const int to)
8729 if (stack_realign_fp)
8730 return ((from == ARG_POINTER_REGNUM
8731 && to == HARD_FRAME_POINTER_REGNUM)
8732 || (from == FRAME_POINTER_REGNUM
8733 && to == STACK_POINTER_REGNUM));
8735 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8738 /* Return the offset between two registers, one to be eliminated, and the other
8739 its replacement, at the start of a routine. */
8742 ix86_initial_elimination_offset (int from, int to)
8744 struct ix86_frame frame;
8745 ix86_compute_frame_layout (&frame);
8747 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8748 return frame.hard_frame_pointer_offset;
8749 else if (from == FRAME_POINTER_REGNUM
8750 && to == HARD_FRAME_POINTER_REGNUM)
8751 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8754 gcc_assert (to == STACK_POINTER_REGNUM);
8756 if (from == ARG_POINTER_REGNUM)
8757 return frame.stack_pointer_offset;
8759 gcc_assert (from == FRAME_POINTER_REGNUM);
8760 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8764 /* In a dynamically-aligned function, we can't know the offset from
8765 stack pointer to frame pointer, so we must ensure that setjmp
8766 eliminates fp against the hard fp (%ebp) rather than trying to
8767 index from %esp up to the top of the frame across a gap that is
8768 of unknown (at compile-time) size. */
8770 ix86_builtin_setjmp_frame_value (void)
8772 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8775 /* When using -fsplit-stack, the allocation routines set a field in
8776 the TCB to the bottom of the stack plus this much space, measured
8779 #define SPLIT_STACK_AVAILABLE 256
8781 /* Fill structure ix86_frame about frame of currently computed function. */
8784 ix86_compute_frame_layout (struct ix86_frame *frame)
8786 unsigned int stack_alignment_needed;
8787 HOST_WIDE_INT offset;
8788 unsigned int preferred_alignment;
8789 HOST_WIDE_INT size = get_frame_size ();
8790 HOST_WIDE_INT to_allocate;
8792 frame->nregs = ix86_nsaved_regs ();
8793 frame->nsseregs = ix86_nsaved_sseregs ();
8795 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8796 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8798 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8799 function prologues and leaf. */
8800 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8801 && (!current_function_is_leaf || cfun->calls_alloca != 0
8802 || ix86_current_function_calls_tls_descriptor))
8804 preferred_alignment = 16;
8805 stack_alignment_needed = 16;
8806 crtl->preferred_stack_boundary = 128;
8807 crtl->stack_alignment_needed = 128;
8810 gcc_assert (!size || stack_alignment_needed);
8811 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8812 gcc_assert (preferred_alignment <= stack_alignment_needed);
8814 /* For SEH we have to limit the amount of code movement into the prologue.
8815 At present we do this via a BLOCKAGE, at which point there's very little
8816 scheduling that can be done, which means that there's very little point
8817 in doing anything except PUSHs. */
8819 cfun->machine->use_fast_prologue_epilogue = false;
8821 /* During reload iteration the amount of registers saved can change.
8822 Recompute the value as needed. Do not recompute when amount of registers
8823 didn't change as reload does multiple calls to the function and does not
8824 expect the decision to change within single iteration. */
8825 else if (!optimize_function_for_size_p (cfun)
8826 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8828 int count = frame->nregs;
8829 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8831 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8833 /* The fast prologue uses move instead of push to save registers. This
8834 is significantly longer, but also executes faster as modern hardware
8835 can execute the moves in parallel, but can't do that for push/pop.
8837 Be careful about choosing what prologue to emit: When function takes
8838 many instructions to execute we may use slow version as well as in
8839 case function is known to be outside hot spot (this is known with
8840 feedback only). Weight the size of function by number of registers
8841 to save as it is cheap to use one or two push instructions but very
8842 slow to use many of them. */
8844 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8845 if (node->frequency < NODE_FREQUENCY_NORMAL
8846 || (flag_branch_probabilities
8847 && node->frequency < NODE_FREQUENCY_HOT))
8848 cfun->machine->use_fast_prologue_epilogue = false;
8850 cfun->machine->use_fast_prologue_epilogue
8851 = !expensive_function_p (count);
8854 frame->save_regs_using_mov
8855 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
8856 /* If static stack checking is enabled and done with probes,
8857 the registers need to be saved before allocating the frame. */
8858 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
8860 /* Skip return address. */
8861 offset = UNITS_PER_WORD;
8863 /* Skip pushed static chain. */
8864 if (ix86_static_chain_on_stack)
8865 offset += UNITS_PER_WORD;
8867 /* Skip saved base pointer. */
8868 if (frame_pointer_needed)
8869 offset += UNITS_PER_WORD;
8870 frame->hfp_save_offset = offset;
8872 /* The traditional frame pointer location is at the top of the frame. */
8873 frame->hard_frame_pointer_offset = offset;
8875 /* Register save area */
8876 offset += frame->nregs * UNITS_PER_WORD;
8877 frame->reg_save_offset = offset;
8879 /* Align and set SSE register save area. */
8880 if (frame->nsseregs)
8882 /* The only ABI that has saved SSE registers (Win64) also has a
8883 16-byte aligned default stack, and thus we don't need to be
8884 within the re-aligned local stack frame to save them. */
8885 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8886 offset = (offset + 16 - 1) & -16;
8887 offset += frame->nsseregs * 16;
8889 frame->sse_reg_save_offset = offset;
8891 /* The re-aligned stack starts here. Values before this point are not
8892 directly comparable with values below this point. In order to make
8893 sure that no value happens to be the same before and after, force
8894 the alignment computation below to add a non-zero value. */
8895 if (stack_realign_fp)
8896 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8899 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8900 offset += frame->va_arg_size;
8902 /* Align start of frame for local function. */
8903 if (stack_realign_fp
8904 || offset != frame->sse_reg_save_offset
8906 || !current_function_is_leaf
8907 || cfun->calls_alloca
8908 || ix86_current_function_calls_tls_descriptor)
8909 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8911 /* Frame pointer points here. */
8912 frame->frame_pointer_offset = offset;
8916 /* Add outgoing arguments area. Can be skipped if we eliminated
8917 all the function calls as dead code.
8918 Skipping is however impossible when function calls alloca. Alloca
8919 expander assumes that last crtl->outgoing_args_size
8920 of stack frame are unused. */
8921 if (ACCUMULATE_OUTGOING_ARGS
8922 && (!current_function_is_leaf || cfun->calls_alloca
8923 || ix86_current_function_calls_tls_descriptor))
8925 offset += crtl->outgoing_args_size;
8926 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8929 frame->outgoing_arguments_size = 0;
8931 /* Align stack boundary. Only needed if we're calling another function
8933 if (!current_function_is_leaf || cfun->calls_alloca
8934 || ix86_current_function_calls_tls_descriptor)
8935 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8937 /* We've reached end of stack frame. */
8938 frame->stack_pointer_offset = offset;
8940 /* Size prologue needs to allocate. */
8941 to_allocate = offset - frame->sse_reg_save_offset;
8943 if ((!to_allocate && frame->nregs <= 1)
8944 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8945 frame->save_regs_using_mov = false;
8947 if (ix86_using_red_zone ()
8948 && current_function_sp_is_unchanging
8949 && current_function_is_leaf
8950 && !ix86_current_function_calls_tls_descriptor)
8952 frame->red_zone_size = to_allocate;
8953 if (frame->save_regs_using_mov)
8954 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8955 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8956 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8959 frame->red_zone_size = 0;
8960 frame->stack_pointer_offset -= frame->red_zone_size;
8962 /* The SEH frame pointer location is near the bottom of the frame.
8963 This is enforced by the fact that the difference between the
8964 stack pointer and the frame pointer is limited to 240 bytes in
8965 the unwind data structure. */
8970 /* If we can leave the frame pointer where it is, do so. */
8971 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
8972 if (diff > 240 || (diff & 15) != 0)
8974 /* Ideally we'd determine what portion of the local stack frame
8975 (within the constraint of the lowest 240) is most heavily used.
8976 But without that complication, simply bias the frame pointer
8977 by 128 bytes so as to maximize the amount of the local stack
8978 frame that is addressable with 8-bit offsets. */
8979 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
8984 /* This is semi-inlined memory_address_length, but simplified
8985 since we know that we're always dealing with reg+offset, and
8986 to avoid having to create and discard all that rtl. */
8989 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8995 /* EBP and R13 cannot be encoded without an offset. */
8996 len = (regno == BP_REG || regno == R13_REG);
8998 else if (IN_RANGE (offset, -128, 127))
9001 /* ESP and R12 must be encoded with a SIB byte. */
9002 if (regno == SP_REG || regno == R12_REG)
9008 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9009 The valid base registers are taken from CFUN->MACHINE->FS. */
9012 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9014 const struct machine_function *m = cfun->machine;
9015 rtx base_reg = NULL;
9016 HOST_WIDE_INT base_offset = 0;
9018 if (m->use_fast_prologue_epilogue)
9020 /* Choose the base register most likely to allow the most scheduling
9021 opportunities. Generally FP is valid througout the function,
9022 while DRAP must be reloaded within the epilogue. But choose either
9023 over the SP due to increased encoding size. */
9027 base_reg = hard_frame_pointer_rtx;
9028 base_offset = m->fs.fp_offset - cfa_offset;
9030 else if (m->fs.drap_valid)
9032 base_reg = crtl->drap_reg;
9033 base_offset = 0 - cfa_offset;
9035 else if (m->fs.sp_valid)
9037 base_reg = stack_pointer_rtx;
9038 base_offset = m->fs.sp_offset - cfa_offset;
9043 HOST_WIDE_INT toffset;
9046 /* Choose the base register with the smallest address encoding.
9047 With a tie, choose FP > DRAP > SP. */
9050 base_reg = stack_pointer_rtx;
9051 base_offset = m->fs.sp_offset - cfa_offset;
9052 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9054 if (m->fs.drap_valid)
9056 toffset = 0 - cfa_offset;
9057 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9060 base_reg = crtl->drap_reg;
9061 base_offset = toffset;
9067 toffset = m->fs.fp_offset - cfa_offset;
9068 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9071 base_reg = hard_frame_pointer_rtx;
9072 base_offset = toffset;
9077 gcc_assert (base_reg != NULL);
9079 return plus_constant (base_reg, base_offset);
9082 /* Emit code to save registers in the prologue. */
9085 ix86_emit_save_regs (void)
9090 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9091 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9093 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9094 RTX_FRAME_RELATED_P (insn) = 1;
9098 /* Emit a single register save at CFA - CFA_OFFSET. */
9101 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9102 HOST_WIDE_INT cfa_offset)
9104 struct machine_function *m = cfun->machine;
9105 rtx reg = gen_rtx_REG (mode, regno);
9106 rtx mem, addr, base, insn;
9108 addr = choose_baseaddr (cfa_offset);
9109 mem = gen_frame_mem (mode, addr);
9111 /* For SSE saves, we need to indicate the 128-bit alignment. */
9112 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9114 insn = emit_move_insn (mem, reg);
9115 RTX_FRAME_RELATED_P (insn) = 1;
9118 if (GET_CODE (base) == PLUS)
9119 base = XEXP (base, 0);
9120 gcc_checking_assert (REG_P (base));
9122 /* When saving registers into a re-aligned local stack frame, avoid
9123 any tricky guessing by dwarf2out. */
9124 if (m->fs.realigned)
9126 gcc_checking_assert (stack_realign_drap);
9128 if (regno == REGNO (crtl->drap_reg))
9130 /* A bit of a hack. We force the DRAP register to be saved in
9131 the re-aligned stack frame, which provides us with a copy
9132 of the CFA that will last past the prologue. Install it. */
9133 gcc_checking_assert (cfun->machine->fs.fp_valid);
9134 addr = plus_constant (hard_frame_pointer_rtx,
9135 cfun->machine->fs.fp_offset - cfa_offset);
9136 mem = gen_rtx_MEM (mode, addr);
9137 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9141 /* The frame pointer is a stable reference within the
9142 aligned frame. Use it. */
9143 gcc_checking_assert (cfun->machine->fs.fp_valid);
9144 addr = plus_constant (hard_frame_pointer_rtx,
9145 cfun->machine->fs.fp_offset - cfa_offset);
9146 mem = gen_rtx_MEM (mode, addr);
9147 add_reg_note (insn, REG_CFA_EXPRESSION,
9148 gen_rtx_SET (VOIDmode, mem, reg));
9152 /* The memory may not be relative to the current CFA register,
9153 which means that we may need to generate a new pattern for
9154 use by the unwind info. */
9155 else if (base != m->fs.cfa_reg)
9157 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9158 mem = gen_rtx_MEM (mode, addr);
9159 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9163 /* Emit code to save registers using MOV insns.
9164 First register is stored at CFA - CFA_OFFSET. */
9166 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9170 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9171 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9173 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9174 cfa_offset -= UNITS_PER_WORD;
9178 /* Emit code to save SSE registers using MOV insns.
9179 First register is stored at CFA - CFA_OFFSET. */
9181 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9185 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9186 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9188 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9193 static GTY(()) rtx queued_cfa_restores;
9195 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9196 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9197 Don't add the note if the previously saved value will be left untouched
9198 within stack red-zone till return, as unwinders can find the same value
9199 in the register and on the stack. */
9202 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9204 if (!crtl->shrink_wrapped
9205 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9210 add_reg_note (insn, REG_CFA_RESTORE, reg);
9211 RTX_FRAME_RELATED_P (insn) = 1;
9215 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9218 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9221 ix86_add_queued_cfa_restore_notes (rtx insn)
9224 if (!queued_cfa_restores)
9226 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9228 XEXP (last, 1) = REG_NOTES (insn);
9229 REG_NOTES (insn) = queued_cfa_restores;
9230 queued_cfa_restores = NULL_RTX;
9231 RTX_FRAME_RELATED_P (insn) = 1;
9234 /* Expand prologue or epilogue stack adjustment.
9235 The pattern exist to put a dependency on all ebp-based memory accesses.
9236 STYLE should be negative if instructions should be marked as frame related,
9237 zero if %r11 register is live and cannot be freely used and positive
9241 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9242 int style, bool set_cfa)
9244 struct machine_function *m = cfun->machine;
9246 bool add_frame_related_expr = false;
9249 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9250 else if (x86_64_immediate_operand (offset, DImode))
9251 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9255 /* r11 is used by indirect sibcall return as well, set before the
9256 epilogue and used after the epilogue. */
9258 tmp = gen_rtx_REG (DImode, R11_REG);
9261 gcc_assert (src != hard_frame_pointer_rtx
9262 && dest != hard_frame_pointer_rtx);
9263 tmp = hard_frame_pointer_rtx;
9265 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9267 add_frame_related_expr = true;
9269 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9272 insn = emit_insn (insn);
9274 ix86_add_queued_cfa_restore_notes (insn);
9280 gcc_assert (m->fs.cfa_reg == src);
9281 m->fs.cfa_offset += INTVAL (offset);
9282 m->fs.cfa_reg = dest;
9284 r = gen_rtx_PLUS (Pmode, src, offset);
9285 r = gen_rtx_SET (VOIDmode, dest, r);
9286 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9287 RTX_FRAME_RELATED_P (insn) = 1;
9291 RTX_FRAME_RELATED_P (insn) = 1;
9292 if (add_frame_related_expr)
9294 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9295 r = gen_rtx_SET (VOIDmode, dest, r);
9296 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9300 if (dest == stack_pointer_rtx)
9302 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9303 bool valid = m->fs.sp_valid;
9305 if (src == hard_frame_pointer_rtx)
9307 valid = m->fs.fp_valid;
9308 ooffset = m->fs.fp_offset;
9310 else if (src == crtl->drap_reg)
9312 valid = m->fs.drap_valid;
9317 /* Else there are two possibilities: SP itself, which we set
9318 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9319 taken care of this by hand along the eh_return path. */
9320 gcc_checking_assert (src == stack_pointer_rtx
9321 || offset == const0_rtx);
9324 m->fs.sp_offset = ooffset - INTVAL (offset);
9325 m->fs.sp_valid = valid;
9329 /* Find an available register to be used as dynamic realign argument
9330 pointer regsiter. Such a register will be written in prologue and
9331 used in begin of body, so it must not be
9332 1. parameter passing register.
9334 We reuse static-chain register if it is available. Otherwise, we
9335 use DI for i386 and R13 for x86-64. We chose R13 since it has
9338 Return: the regno of chosen register. */
9341 find_drap_reg (void)
9343 tree decl = cfun->decl;
9347 /* Use R13 for nested function or function need static chain.
9348 Since function with tail call may use any caller-saved
9349 registers in epilogue, DRAP must not use caller-saved
9350 register in such case. */
9351 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9358 /* Use DI for nested function or function need static chain.
9359 Since function with tail call may use any caller-saved
9360 registers in epilogue, DRAP must not use caller-saved
9361 register in such case. */
9362 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9365 /* Reuse static chain register if it isn't used for parameter
9367 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9369 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9370 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9377 /* Return minimum incoming stack alignment. */
9380 ix86_minimum_incoming_stack_boundary (bool sibcall)
9382 unsigned int incoming_stack_boundary;
9384 /* Prefer the one specified at command line. */
9385 if (ix86_user_incoming_stack_boundary)
9386 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9387 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9388 if -mstackrealign is used, it isn't used for sibcall check and
9389 estimated stack alignment is 128bit. */
9392 && ix86_force_align_arg_pointer
9393 && crtl->stack_alignment_estimated == 128)
9394 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9396 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9398 /* Incoming stack alignment can be changed on individual functions
9399 via force_align_arg_pointer attribute. We use the smallest
9400 incoming stack boundary. */
9401 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9402 && lookup_attribute (ix86_force_align_arg_pointer_string,
9403 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9404 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9406 /* The incoming stack frame has to be aligned at least at
9407 parm_stack_boundary. */
9408 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9409 incoming_stack_boundary = crtl->parm_stack_boundary;
9411 /* Stack at entrance of main is aligned by runtime. We use the
9412 smallest incoming stack boundary. */
9413 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9414 && DECL_NAME (current_function_decl)
9415 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9416 && DECL_FILE_SCOPE_P (current_function_decl))
9417 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9419 return incoming_stack_boundary;
9422 /* Update incoming stack boundary and estimated stack alignment. */
9425 ix86_update_stack_boundary (void)
9427 ix86_incoming_stack_boundary
9428 = ix86_minimum_incoming_stack_boundary (false);
9430 /* x86_64 vararg needs 16byte stack alignment for register save
9434 && crtl->stack_alignment_estimated < 128)
9435 crtl->stack_alignment_estimated = 128;
9438 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9439 needed or an rtx for DRAP otherwise. */
9442 ix86_get_drap_rtx (void)
9444 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9445 crtl->need_drap = true;
9447 if (stack_realign_drap)
9449 /* Assign DRAP to vDRAP and returns vDRAP */
9450 unsigned int regno = find_drap_reg ();
9455 arg_ptr = gen_rtx_REG (Pmode, regno);
9456 crtl->drap_reg = arg_ptr;
9459 drap_vreg = copy_to_reg (arg_ptr);
9463 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9466 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9467 RTX_FRAME_RELATED_P (insn) = 1;
9475 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9478 ix86_internal_arg_pointer (void)
9480 return virtual_incoming_args_rtx;
9483 struct scratch_reg {
9488 /* Return a short-lived scratch register for use on function entry.
9489 In 32-bit mode, it is valid only after the registers are saved
9490 in the prologue. This register must be released by means of
9491 release_scratch_register_on_entry once it is dead. */
9494 get_scratch_register_on_entry (struct scratch_reg *sr)
9502 /* We always use R11 in 64-bit mode. */
9507 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9509 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9510 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9511 int regparm = ix86_function_regparm (fntype, decl);
9513 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9515 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9516 for the static chain register. */
9517 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9518 && drap_regno != AX_REG)
9520 else if (regparm < 2 && drap_regno != DX_REG)
9522 /* ecx is the static chain register. */
9523 else if (regparm < 3 && !fastcall_p && !static_chain_p
9524 && drap_regno != CX_REG)
9526 else if (ix86_save_reg (BX_REG, true))
9528 /* esi is the static chain register. */
9529 else if (!(regparm == 3 && static_chain_p)
9530 && ix86_save_reg (SI_REG, true))
9532 else if (ix86_save_reg (DI_REG, true))
9536 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9541 sr->reg = gen_rtx_REG (Pmode, regno);
9544 rtx insn = emit_insn (gen_push (sr->reg));
9545 RTX_FRAME_RELATED_P (insn) = 1;
9549 /* Release a scratch register obtained from the preceding function. */
9552 release_scratch_register_on_entry (struct scratch_reg *sr)
9556 rtx x, insn = emit_insn (gen_pop (sr->reg));
9558 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9559 RTX_FRAME_RELATED_P (insn) = 1;
9560 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9561 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9562 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9566 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9568 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9571 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9573 /* We skip the probe for the first interval + a small dope of 4 words and
9574 probe that many bytes past the specified size to maintain a protection
9575 area at the botton of the stack. */
9576 const int dope = 4 * UNITS_PER_WORD;
9577 rtx size_rtx = GEN_INT (size), last;
9579 /* See if we have a constant small number of probes to generate. If so,
9580 that's the easy case. The run-time loop is made up of 11 insns in the
9581 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9582 for n # of intervals. */
9583 if (size <= 5 * PROBE_INTERVAL)
9585 HOST_WIDE_INT i, adjust;
9586 bool first_probe = true;
9588 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9589 values of N from 1 until it exceeds SIZE. If only one probe is
9590 needed, this will not generate any code. Then adjust and probe
9591 to PROBE_INTERVAL + SIZE. */
9592 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9596 adjust = 2 * PROBE_INTERVAL + dope;
9597 first_probe = false;
9600 adjust = PROBE_INTERVAL;
9602 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9603 plus_constant (stack_pointer_rtx, -adjust)));
9604 emit_stack_probe (stack_pointer_rtx);
9608 adjust = size + PROBE_INTERVAL + dope;
9610 adjust = size + PROBE_INTERVAL - i;
9612 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9613 plus_constant (stack_pointer_rtx, -adjust)));
9614 emit_stack_probe (stack_pointer_rtx);
9616 /* Adjust back to account for the additional first interval. */
9617 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9618 plus_constant (stack_pointer_rtx,
9619 PROBE_INTERVAL + dope)));
9622 /* Otherwise, do the same as above, but in a loop. Note that we must be
9623 extra careful with variables wrapping around because we might be at
9624 the very top (or the very bottom) of the address space and we have
9625 to be able to handle this case properly; in particular, we use an
9626 equality test for the loop condition. */
9629 HOST_WIDE_INT rounded_size;
9630 struct scratch_reg sr;
9632 get_scratch_register_on_entry (&sr);
9635 /* Step 1: round SIZE to the previous multiple of the interval. */
9637 rounded_size = size & -PROBE_INTERVAL;
9640 /* Step 2: compute initial and final value of the loop counter. */
9642 /* SP = SP_0 + PROBE_INTERVAL. */
9643 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9644 plus_constant (stack_pointer_rtx,
9645 - (PROBE_INTERVAL + dope))));
9647 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9648 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9649 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9650 gen_rtx_PLUS (Pmode, sr.reg,
9651 stack_pointer_rtx)));
9656 while (SP != LAST_ADDR)
9658 SP = SP + PROBE_INTERVAL
9662 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9663 values of N from 1 until it is equal to ROUNDED_SIZE. */
9665 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9668 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9669 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9671 if (size != rounded_size)
9673 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9674 plus_constant (stack_pointer_rtx,
9675 rounded_size - size)));
9676 emit_stack_probe (stack_pointer_rtx);
9679 /* Adjust back to account for the additional first interval. */
9680 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9681 plus_constant (stack_pointer_rtx,
9682 PROBE_INTERVAL + dope)));
9684 release_scratch_register_on_entry (&sr);
9687 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9689 /* Even if the stack pointer isn't the CFA register, we need to correctly
9690 describe the adjustments made to it, in particular differentiate the
9691 frame-related ones from the frame-unrelated ones. */
9694 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9695 XVECEXP (expr, 0, 0)
9696 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9697 plus_constant (stack_pointer_rtx, -size));
9698 XVECEXP (expr, 0, 1)
9699 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9700 plus_constant (stack_pointer_rtx,
9701 PROBE_INTERVAL + dope + size));
9702 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9703 RTX_FRAME_RELATED_P (last) = 1;
9705 cfun->machine->fs.sp_offset += size;
9708 /* Make sure nothing is scheduled before we are done. */
9709 emit_insn (gen_blockage ());
9712 /* Adjust the stack pointer up to REG while probing it. */
9715 output_adjust_stack_and_probe (rtx reg)
9717 static int labelno = 0;
9718 char loop_lab[32], end_lab[32];
9721 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9722 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9724 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9726 /* Jump to END_LAB if SP == LAST_ADDR. */
9727 xops[0] = stack_pointer_rtx;
9729 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9730 fputs ("\tje\t", asm_out_file);
9731 assemble_name_raw (asm_out_file, end_lab);
9732 fputc ('\n', asm_out_file);
9734 /* SP = SP + PROBE_INTERVAL. */
9735 xops[1] = GEN_INT (PROBE_INTERVAL);
9736 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9739 xops[1] = const0_rtx;
9740 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9742 fprintf (asm_out_file, "\tjmp\t");
9743 assemble_name_raw (asm_out_file, loop_lab);
9744 fputc ('\n', asm_out_file);
9746 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9751 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9752 inclusive. These are offsets from the current stack pointer. */
9755 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9757 /* See if we have a constant small number of probes to generate. If so,
9758 that's the easy case. The run-time loop is made up of 7 insns in the
9759 generic case while the compile-time loop is made up of n insns for n #
9761 if (size <= 7 * PROBE_INTERVAL)
9765 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9766 it exceeds SIZE. If only one probe is needed, this will not
9767 generate any code. Then probe at FIRST + SIZE. */
9768 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9769 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9771 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9774 /* Otherwise, do the same as above, but in a loop. Note that we must be
9775 extra careful with variables wrapping around because we might be at
9776 the very top (or the very bottom) of the address space and we have
9777 to be able to handle this case properly; in particular, we use an
9778 equality test for the loop condition. */
9781 HOST_WIDE_INT rounded_size, last;
9782 struct scratch_reg sr;
9784 get_scratch_register_on_entry (&sr);
9787 /* Step 1: round SIZE to the previous multiple of the interval. */
9789 rounded_size = size & -PROBE_INTERVAL;
9792 /* Step 2: compute initial and final value of the loop counter. */
9794 /* TEST_OFFSET = FIRST. */
9795 emit_move_insn (sr.reg, GEN_INT (-first));
9797 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9798 last = first + rounded_size;
9803 while (TEST_ADDR != LAST_ADDR)
9805 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9809 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9810 until it is equal to ROUNDED_SIZE. */
9812 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9815 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9816 that SIZE is equal to ROUNDED_SIZE. */
9818 if (size != rounded_size)
9819 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9822 rounded_size - size));
9824 release_scratch_register_on_entry (&sr);
9827 /* Make sure nothing is scheduled before we are done. */
9828 emit_insn (gen_blockage ());
9831 /* Probe a range of stack addresses from REG to END, inclusive. These are
9832 offsets from the current stack pointer. */
9835 output_probe_stack_range (rtx reg, rtx end)
9837 static int labelno = 0;
9838 char loop_lab[32], end_lab[32];
9841 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9842 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9844 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9846 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9849 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9850 fputs ("\tje\t", asm_out_file);
9851 assemble_name_raw (asm_out_file, end_lab);
9852 fputc ('\n', asm_out_file);
9854 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9855 xops[1] = GEN_INT (PROBE_INTERVAL);
9856 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9858 /* Probe at TEST_ADDR. */
9859 xops[0] = stack_pointer_rtx;
9861 xops[2] = const0_rtx;
9862 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9864 fprintf (asm_out_file, "\tjmp\t");
9865 assemble_name_raw (asm_out_file, loop_lab);
9866 fputc ('\n', asm_out_file);
9868 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9873 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9874 to be generated in correct form. */
9876 ix86_finalize_stack_realign_flags (void)
9878 /* Check if stack realign is really needed after reload, and
9879 stores result in cfun */
9880 unsigned int incoming_stack_boundary
9881 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9882 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9883 unsigned int stack_realign = (incoming_stack_boundary
9884 < (current_function_is_leaf
9885 ? crtl->max_used_stack_slot_alignment
9886 : crtl->stack_alignment_needed));
9888 if (crtl->stack_realign_finalized)
9890 /* After stack_realign_needed is finalized, we can't no longer
9892 gcc_assert (crtl->stack_realign_needed == stack_realign);
9896 crtl->stack_realign_needed = stack_realign;
9897 crtl->stack_realign_finalized = true;
9901 /* Expand the prologue into a bunch of separate insns. */
9904 ix86_expand_prologue (void)
9906 struct machine_function *m = cfun->machine;
9909 struct ix86_frame frame;
9910 HOST_WIDE_INT allocate;
9911 bool int_registers_saved;
9913 ix86_finalize_stack_realign_flags ();
9915 /* DRAP should not coexist with stack_realign_fp */
9916 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9918 memset (&m->fs, 0, sizeof (m->fs));
9920 /* Initialize CFA state for before the prologue. */
9921 m->fs.cfa_reg = stack_pointer_rtx;
9922 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9924 /* Track SP offset to the CFA. We continue tracking this after we've
9925 swapped the CFA register away from SP. In the case of re-alignment
9926 this is fudged; we're interested to offsets within the local frame. */
9927 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9928 m->fs.sp_valid = true;
9930 ix86_compute_frame_layout (&frame);
9932 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9934 /* We should have already generated an error for any use of
9935 ms_hook on a nested function. */
9936 gcc_checking_assert (!ix86_static_chain_on_stack);
9938 /* Check if profiling is active and we shall use profiling before
9939 prologue variant. If so sorry. */
9940 if (crtl->profile && flag_fentry != 0)
9941 sorry ("ms_hook_prologue attribute isn%'t compatible "
9942 "with -mfentry for 32-bit");
9944 /* In ix86_asm_output_function_label we emitted:
9945 8b ff movl.s %edi,%edi
9947 8b ec movl.s %esp,%ebp
9949 This matches the hookable function prologue in Win32 API
9950 functions in Microsoft Windows XP Service Pack 2 and newer.
9951 Wine uses this to enable Windows apps to hook the Win32 API
9952 functions provided by Wine.
9954 What that means is that we've already set up the frame pointer. */
9956 if (frame_pointer_needed
9957 && !(crtl->drap_reg && crtl->stack_realign_needed))
9961 /* We've decided to use the frame pointer already set up.
9962 Describe this to the unwinder by pretending that both
9963 push and mov insns happen right here.
9965 Putting the unwind info here at the end of the ms_hook
9966 is done so that we can make absolutely certain we get
9967 the required byte sequence at the start of the function,
9968 rather than relying on an assembler that can produce
9969 the exact encoding required.
9971 However it does mean (in the unpatched case) that we have
9972 a 1 insn window where the asynchronous unwind info is
9973 incorrect. However, if we placed the unwind info at
9974 its correct location we would have incorrect unwind info
9975 in the patched case. Which is probably all moot since
9976 I don't expect Wine generates dwarf2 unwind info for the
9977 system libraries that use this feature. */
9979 insn = emit_insn (gen_blockage ());
9981 push = gen_push (hard_frame_pointer_rtx);
9982 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9984 RTX_FRAME_RELATED_P (push) = 1;
9985 RTX_FRAME_RELATED_P (mov) = 1;
9987 RTX_FRAME_RELATED_P (insn) = 1;
9988 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9989 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9991 /* Note that gen_push incremented m->fs.cfa_offset, even
9992 though we didn't emit the push insn here. */
9993 m->fs.cfa_reg = hard_frame_pointer_rtx;
9994 m->fs.fp_offset = m->fs.cfa_offset;
9995 m->fs.fp_valid = true;
9999 /* The frame pointer is not needed so pop %ebp again.
10000 This leaves us with a pristine state. */
10001 emit_insn (gen_pop (hard_frame_pointer_rtx));
10005 /* The first insn of a function that accepts its static chain on the
10006 stack is to push the register that would be filled in by a direct
10007 call. This insn will be skipped by the trampoline. */
10008 else if (ix86_static_chain_on_stack)
10010 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10011 emit_insn (gen_blockage ());
10013 /* We don't want to interpret this push insn as a register save,
10014 only as a stack adjustment. The real copy of the register as
10015 a save will be done later, if needed. */
10016 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10017 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10018 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10019 RTX_FRAME_RELATED_P (insn) = 1;
10022 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10023 of DRAP is needed and stack realignment is really needed after reload */
10024 if (stack_realign_drap)
10026 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10028 /* Only need to push parameter pointer reg if it is caller saved. */
10029 if (!call_used_regs[REGNO (crtl->drap_reg)])
10031 /* Push arg pointer reg */
10032 insn = emit_insn (gen_push (crtl->drap_reg));
10033 RTX_FRAME_RELATED_P (insn) = 1;
10036 /* Grab the argument pointer. */
10037 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10038 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10039 RTX_FRAME_RELATED_P (insn) = 1;
10040 m->fs.cfa_reg = crtl->drap_reg;
10041 m->fs.cfa_offset = 0;
10043 /* Align the stack. */
10044 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10046 GEN_INT (-align_bytes)));
10047 RTX_FRAME_RELATED_P (insn) = 1;
10049 /* Replicate the return address on the stack so that return
10050 address can be reached via (argp - 1) slot. This is needed
10051 to implement macro RETURN_ADDR_RTX and intrinsic function
10052 expand_builtin_return_addr etc. */
10053 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10054 t = gen_frame_mem (Pmode, t);
10055 insn = emit_insn (gen_push (t));
10056 RTX_FRAME_RELATED_P (insn) = 1;
10058 /* For the purposes of frame and register save area addressing,
10059 we've started over with a new frame. */
10060 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10061 m->fs.realigned = true;
10064 if (frame_pointer_needed && !m->fs.fp_valid)
10066 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10067 slower on all targets. Also sdb doesn't like it. */
10068 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10069 RTX_FRAME_RELATED_P (insn) = 1;
10071 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10073 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10074 RTX_FRAME_RELATED_P (insn) = 1;
10076 if (m->fs.cfa_reg == stack_pointer_rtx)
10077 m->fs.cfa_reg = hard_frame_pointer_rtx;
10078 m->fs.fp_offset = m->fs.sp_offset;
10079 m->fs.fp_valid = true;
10083 int_registers_saved = (frame.nregs == 0);
10085 if (!int_registers_saved)
10087 /* If saving registers via PUSH, do so now. */
10088 if (!frame.save_regs_using_mov)
10090 ix86_emit_save_regs ();
10091 int_registers_saved = true;
10092 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10095 /* When using red zone we may start register saving before allocating
10096 the stack frame saving one cycle of the prologue. However, avoid
10097 doing this if we have to probe the stack; at least on x86_64 the
10098 stack probe can turn into a call that clobbers a red zone location. */
10099 else if (ix86_using_red_zone ()
10100 && (! TARGET_STACK_PROBE
10101 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10103 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10104 int_registers_saved = true;
10108 if (stack_realign_fp)
10110 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10111 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10113 /* The computation of the size of the re-aligned stack frame means
10114 that we must allocate the size of the register save area before
10115 performing the actual alignment. Otherwise we cannot guarantee
10116 that there's enough storage above the realignment point. */
10117 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10118 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10119 GEN_INT (m->fs.sp_offset
10120 - frame.sse_reg_save_offset),
10123 /* Align the stack. */
10124 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10126 GEN_INT (-align_bytes)));
10128 /* For the purposes of register save area addressing, the stack
10129 pointer is no longer valid. As for the value of sp_offset,
10130 see ix86_compute_frame_layout, which we need to match in order
10131 to pass verification of stack_pointer_offset at the end. */
10132 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10133 m->fs.sp_valid = false;
10136 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10138 if (flag_stack_usage_info)
10140 /* We start to count from ARG_POINTER. */
10141 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10143 /* If it was realigned, take into account the fake frame. */
10144 if (stack_realign_drap)
10146 if (ix86_static_chain_on_stack)
10147 stack_size += UNITS_PER_WORD;
10149 if (!call_used_regs[REGNO (crtl->drap_reg)])
10150 stack_size += UNITS_PER_WORD;
10152 /* This over-estimates by 1 minimal-stack-alignment-unit but
10153 mitigates that by counting in the new return address slot. */
10154 current_function_dynamic_stack_size
10155 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10158 current_function_static_stack_size = stack_size;
10161 /* The stack has already been decremented by the instruction calling us
10162 so probe if the size is non-negative to preserve the protection area. */
10163 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10165 /* We expect the registers to be saved when probes are used. */
10166 gcc_assert (int_registers_saved);
10168 if (STACK_CHECK_MOVING_SP)
10170 ix86_adjust_stack_and_probe (allocate);
10175 HOST_WIDE_INT size = allocate;
10177 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10178 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10180 if (TARGET_STACK_PROBE)
10181 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10183 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10189 else if (!ix86_target_stack_probe ()
10190 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10192 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10193 GEN_INT (-allocate), -1,
10194 m->fs.cfa_reg == stack_pointer_rtx);
10198 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10200 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10202 bool eax_live = false;
10203 bool r10_live = false;
10206 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10207 if (!TARGET_64BIT_MS_ABI)
10208 eax_live = ix86_eax_live_at_start_p ();
10212 emit_insn (gen_push (eax));
10213 allocate -= UNITS_PER_WORD;
10217 r10 = gen_rtx_REG (Pmode, R10_REG);
10218 emit_insn (gen_push (r10));
10219 allocate -= UNITS_PER_WORD;
10222 emit_move_insn (eax, GEN_INT (allocate));
10223 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10225 /* Use the fact that AX still contains ALLOCATE. */
10226 adjust_stack_insn = (TARGET_64BIT
10227 ? gen_pro_epilogue_adjust_stack_di_sub
10228 : gen_pro_epilogue_adjust_stack_si_sub);
10230 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10231 stack_pointer_rtx, eax));
10233 /* Note that SEH directives need to continue tracking the stack
10234 pointer even after the frame pointer has been set up. */
10235 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10237 if (m->fs.cfa_reg == stack_pointer_rtx)
10238 m->fs.cfa_offset += allocate;
10240 RTX_FRAME_RELATED_P (insn) = 1;
10241 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10242 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10243 plus_constant (stack_pointer_rtx,
10246 m->fs.sp_offset += allocate;
10248 if (r10_live && eax_live)
10250 t = choose_baseaddr (m->fs.sp_offset - allocate);
10251 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10252 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10253 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10255 else if (eax_live || r10_live)
10257 t = choose_baseaddr (m->fs.sp_offset - allocate);
10258 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10261 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10263 /* If we havn't already set up the frame pointer, do so now. */
10264 if (frame_pointer_needed && !m->fs.fp_valid)
10266 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10267 GEN_INT (frame.stack_pointer_offset
10268 - frame.hard_frame_pointer_offset));
10269 insn = emit_insn (insn);
10270 RTX_FRAME_RELATED_P (insn) = 1;
10271 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10273 if (m->fs.cfa_reg == stack_pointer_rtx)
10274 m->fs.cfa_reg = hard_frame_pointer_rtx;
10275 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10276 m->fs.fp_valid = true;
10279 if (!int_registers_saved)
10280 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10281 if (frame.nsseregs)
10282 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10284 pic_reg_used = false;
10285 if (pic_offset_table_rtx
10286 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10289 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10291 if (alt_pic_reg_used != INVALID_REGNUM)
10292 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10294 pic_reg_used = true;
10301 if (ix86_cmodel == CM_LARGE_PIC)
10303 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10304 rtx label = gen_label_rtx ();
10305 emit_label (label);
10306 LABEL_PRESERVE_P (label) = 1;
10307 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10308 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10309 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10310 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10311 pic_offset_table_rtx, tmp_reg));
10314 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10318 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10319 RTX_FRAME_RELATED_P (insn) = 1;
10320 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10324 /* In the pic_reg_used case, make sure that the got load isn't deleted
10325 when mcount needs it. Blockage to avoid call movement across mcount
10326 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10328 if (crtl->profile && !flag_fentry && pic_reg_used)
10329 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10331 if (crtl->drap_reg && !crtl->stack_realign_needed)
10333 /* vDRAP is setup but after reload it turns out stack realign
10334 isn't necessary, here we will emit prologue to setup DRAP
10335 without stack realign adjustment */
10336 t = choose_baseaddr (0);
10337 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10340 /* Prevent instructions from being scheduled into register save push
10341 sequence when access to the redzone area is done through frame pointer.
10342 The offset between the frame pointer and the stack pointer is calculated
10343 relative to the value of the stack pointer at the end of the function
10344 prologue, and moving instructions that access redzone area via frame
10345 pointer inside push sequence violates this assumption. */
10346 if (frame_pointer_needed && frame.red_zone_size)
10347 emit_insn (gen_memory_blockage ());
10349 /* Emit cld instruction if stringops are used in the function. */
10350 if (TARGET_CLD && ix86_current_function_needs_cld)
10351 emit_insn (gen_cld ());
10353 /* SEH requires that the prologue end within 256 bytes of the start of
10354 the function. Prevent instruction schedules that would extend that.
10355 Further, prevent alloca modifications to the stack pointer from being
10356 combined with prologue modifications. */
10358 emit_insn (gen_prologue_use (stack_pointer_rtx));
10361 /* Emit code to restore REG using a POP insn. */
10364 ix86_emit_restore_reg_using_pop (rtx reg)
10366 struct machine_function *m = cfun->machine;
10367 rtx insn = emit_insn (gen_pop (reg));
10369 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10370 m->fs.sp_offset -= UNITS_PER_WORD;
10372 if (m->fs.cfa_reg == crtl->drap_reg
10373 && REGNO (reg) == REGNO (crtl->drap_reg))
10375 /* Previously we'd represented the CFA as an expression
10376 like *(%ebp - 8). We've just popped that value from
10377 the stack, which means we need to reset the CFA to
10378 the drap register. This will remain until we restore
10379 the stack pointer. */
10380 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10381 RTX_FRAME_RELATED_P (insn) = 1;
10383 /* This means that the DRAP register is valid for addressing too. */
10384 m->fs.drap_valid = true;
10388 if (m->fs.cfa_reg == stack_pointer_rtx)
10390 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10391 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10392 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10393 RTX_FRAME_RELATED_P (insn) = 1;
10395 m->fs.cfa_offset -= UNITS_PER_WORD;
10398 /* When the frame pointer is the CFA, and we pop it, we are
10399 swapping back to the stack pointer as the CFA. This happens
10400 for stack frames that don't allocate other data, so we assume
10401 the stack pointer is now pointing at the return address, i.e.
10402 the function entry state, which makes the offset be 1 word. */
10403 if (reg == hard_frame_pointer_rtx)
10405 m->fs.fp_valid = false;
10406 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10408 m->fs.cfa_reg = stack_pointer_rtx;
10409 m->fs.cfa_offset -= UNITS_PER_WORD;
10411 add_reg_note (insn, REG_CFA_DEF_CFA,
10412 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10413 GEN_INT (m->fs.cfa_offset)));
10414 RTX_FRAME_RELATED_P (insn) = 1;
10419 /* Emit code to restore saved registers using POP insns. */
10422 ix86_emit_restore_regs_using_pop (void)
10424 unsigned int regno;
10426 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10427 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10428 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10431 /* Emit code and notes for the LEAVE instruction. */
10434 ix86_emit_leave (void)
10436 struct machine_function *m = cfun->machine;
10437 rtx insn = emit_insn (ix86_gen_leave ());
10439 ix86_add_queued_cfa_restore_notes (insn);
10441 gcc_assert (m->fs.fp_valid);
10442 m->fs.sp_valid = true;
10443 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10444 m->fs.fp_valid = false;
10446 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10448 m->fs.cfa_reg = stack_pointer_rtx;
10449 m->fs.cfa_offset = m->fs.sp_offset;
10451 add_reg_note (insn, REG_CFA_DEF_CFA,
10452 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10453 RTX_FRAME_RELATED_P (insn) = 1;
10454 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10459 /* Emit code to restore saved registers using MOV insns.
10460 First register is restored from CFA - CFA_OFFSET. */
10462 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10463 bool maybe_eh_return)
10465 struct machine_function *m = cfun->machine;
10466 unsigned int regno;
10468 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10469 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10471 rtx reg = gen_rtx_REG (Pmode, regno);
10474 mem = choose_baseaddr (cfa_offset);
10475 mem = gen_frame_mem (Pmode, mem);
10476 insn = emit_move_insn (reg, mem);
10478 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10480 /* Previously we'd represented the CFA as an expression
10481 like *(%ebp - 8). We've just popped that value from
10482 the stack, which means we need to reset the CFA to
10483 the drap register. This will remain until we restore
10484 the stack pointer. */
10485 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10486 RTX_FRAME_RELATED_P (insn) = 1;
10488 /* This means that the DRAP register is valid for addressing. */
10489 m->fs.drap_valid = true;
10492 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10494 cfa_offset -= UNITS_PER_WORD;
10498 /* Emit code to restore saved registers using MOV insns.
10499 First register is restored from CFA - CFA_OFFSET. */
10501 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10502 bool maybe_eh_return)
10504 unsigned int regno;
10506 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10507 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10509 rtx reg = gen_rtx_REG (V4SFmode, regno);
10512 mem = choose_baseaddr (cfa_offset);
10513 mem = gen_rtx_MEM (V4SFmode, mem);
10514 set_mem_align (mem, 128);
10515 emit_move_insn (reg, mem);
10517 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10523 /* Restore function stack, frame, and registers. */
10526 ix86_expand_epilogue (int style)
10528 struct machine_function *m = cfun->machine;
10529 struct machine_frame_state frame_state_save = m->fs;
10530 struct ix86_frame frame;
10531 bool restore_regs_via_mov;
10534 ix86_finalize_stack_realign_flags ();
10535 ix86_compute_frame_layout (&frame);
10537 m->fs.sp_valid = (!frame_pointer_needed
10538 || (current_function_sp_is_unchanging
10539 && !stack_realign_fp));
10540 gcc_assert (!m->fs.sp_valid
10541 || m->fs.sp_offset == frame.stack_pointer_offset);
10543 /* The FP must be valid if the frame pointer is present. */
10544 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10545 gcc_assert (!m->fs.fp_valid
10546 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10548 /* We must have *some* valid pointer to the stack frame. */
10549 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10551 /* The DRAP is never valid at this point. */
10552 gcc_assert (!m->fs.drap_valid);
10554 /* See the comment about red zone and frame
10555 pointer usage in ix86_expand_prologue. */
10556 if (frame_pointer_needed && frame.red_zone_size)
10557 emit_insn (gen_memory_blockage ());
10559 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10560 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10562 /* Determine the CFA offset of the end of the red-zone. */
10563 m->fs.red_zone_offset = 0;
10564 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10566 /* The red-zone begins below the return address. */
10567 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10569 /* When the register save area is in the aligned portion of
10570 the stack, determine the maximum runtime displacement that
10571 matches up with the aligned frame. */
10572 if (stack_realign_drap)
10573 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10577 /* Special care must be taken for the normal return case of a function
10578 using eh_return: the eax and edx registers are marked as saved, but
10579 not restored along this path. Adjust the save location to match. */
10580 if (crtl->calls_eh_return && style != 2)
10581 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10583 /* EH_RETURN requires the use of moves to function properly. */
10584 if (crtl->calls_eh_return)
10585 restore_regs_via_mov = true;
10586 /* SEH requires the use of pops to identify the epilogue. */
10587 else if (TARGET_SEH)
10588 restore_regs_via_mov = false;
10589 /* If we're only restoring one register and sp is not valid then
10590 using a move instruction to restore the register since it's
10591 less work than reloading sp and popping the register. */
10592 else if (!m->fs.sp_valid && frame.nregs <= 1)
10593 restore_regs_via_mov = true;
10594 else if (TARGET_EPILOGUE_USING_MOVE
10595 && cfun->machine->use_fast_prologue_epilogue
10596 && (frame.nregs > 1
10597 || m->fs.sp_offset != frame.reg_save_offset))
10598 restore_regs_via_mov = true;
10599 else if (frame_pointer_needed
10601 && m->fs.sp_offset != frame.reg_save_offset)
10602 restore_regs_via_mov = true;
10603 else if (frame_pointer_needed
10604 && TARGET_USE_LEAVE
10605 && cfun->machine->use_fast_prologue_epilogue
10606 && frame.nregs == 1)
10607 restore_regs_via_mov = true;
10609 restore_regs_via_mov = false;
10611 if (restore_regs_via_mov || frame.nsseregs)
10613 /* Ensure that the entire register save area is addressable via
10614 the stack pointer, if we will restore via sp. */
10616 && m->fs.sp_offset > 0x7fffffff
10617 && !(m->fs.fp_valid || m->fs.drap_valid)
10618 && (frame.nsseregs + frame.nregs) != 0)
10620 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10621 GEN_INT (m->fs.sp_offset
10622 - frame.sse_reg_save_offset),
10624 m->fs.cfa_reg == stack_pointer_rtx);
10628 /* If there are any SSE registers to restore, then we have to do it
10629 via moves, since there's obviously no pop for SSE regs. */
10630 if (frame.nsseregs)
10631 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10634 if (restore_regs_via_mov)
10639 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10641 /* eh_return epilogues need %ecx added to the stack pointer. */
10644 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10646 /* Stack align doesn't work with eh_return. */
10647 gcc_assert (!stack_realign_drap);
10648 /* Neither does regparm nested functions. */
10649 gcc_assert (!ix86_static_chain_on_stack);
10651 if (frame_pointer_needed)
10653 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10654 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10655 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10657 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10658 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10660 /* Note that we use SA as a temporary CFA, as the return
10661 address is at the proper place relative to it. We
10662 pretend this happens at the FP restore insn because
10663 prior to this insn the FP would be stored at the wrong
10664 offset relative to SA, and after this insn we have no
10665 other reasonable register to use for the CFA. We don't
10666 bother resetting the CFA to the SP for the duration of
10667 the return insn. */
10668 add_reg_note (insn, REG_CFA_DEF_CFA,
10669 plus_constant (sa, UNITS_PER_WORD));
10670 ix86_add_queued_cfa_restore_notes (insn);
10671 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10672 RTX_FRAME_RELATED_P (insn) = 1;
10674 m->fs.cfa_reg = sa;
10675 m->fs.cfa_offset = UNITS_PER_WORD;
10676 m->fs.fp_valid = false;
10678 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10679 const0_rtx, style, false);
10683 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10684 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10685 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10686 ix86_add_queued_cfa_restore_notes (insn);
10688 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10689 if (m->fs.cfa_offset != UNITS_PER_WORD)
10691 m->fs.cfa_offset = UNITS_PER_WORD;
10692 add_reg_note (insn, REG_CFA_DEF_CFA,
10693 plus_constant (stack_pointer_rtx,
10695 RTX_FRAME_RELATED_P (insn) = 1;
10698 m->fs.sp_offset = UNITS_PER_WORD;
10699 m->fs.sp_valid = true;
10704 /* SEH requires that the function end with (1) a stack adjustment
10705 if necessary, (2) a sequence of pops, and (3) a return or
10706 jump instruction. Prevent insns from the function body from
10707 being scheduled into this sequence. */
10710 /* Prevent a catch region from being adjacent to the standard
10711 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10712 several other flags that would be interesting to test are
10714 if (flag_non_call_exceptions)
10715 emit_insn (gen_nops (const1_rtx));
10717 emit_insn (gen_blockage ());
10720 /* First step is to deallocate the stack frame so that we can
10721 pop the registers. */
10722 if (!m->fs.sp_valid)
10724 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10725 GEN_INT (m->fs.fp_offset
10726 - frame.reg_save_offset),
10729 else if (m->fs.sp_offset != frame.reg_save_offset)
10731 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10732 GEN_INT (m->fs.sp_offset
10733 - frame.reg_save_offset),
10735 m->fs.cfa_reg == stack_pointer_rtx);
10738 ix86_emit_restore_regs_using_pop ();
10741 /* If we used a stack pointer and haven't already got rid of it,
10743 if (m->fs.fp_valid)
10745 /* If the stack pointer is valid and pointing at the frame
10746 pointer store address, then we only need a pop. */
10747 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10748 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10749 /* Leave results in shorter dependency chains on CPUs that are
10750 able to grok it fast. */
10751 else if (TARGET_USE_LEAVE
10752 || optimize_function_for_size_p (cfun)
10753 || !cfun->machine->use_fast_prologue_epilogue)
10754 ix86_emit_leave ();
10757 pro_epilogue_adjust_stack (stack_pointer_rtx,
10758 hard_frame_pointer_rtx,
10759 const0_rtx, style, !using_drap);
10760 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10766 int param_ptr_offset = UNITS_PER_WORD;
10769 gcc_assert (stack_realign_drap);
10771 if (ix86_static_chain_on_stack)
10772 param_ptr_offset += UNITS_PER_WORD;
10773 if (!call_used_regs[REGNO (crtl->drap_reg)])
10774 param_ptr_offset += UNITS_PER_WORD;
10776 insn = emit_insn (gen_rtx_SET
10777 (VOIDmode, stack_pointer_rtx,
10778 gen_rtx_PLUS (Pmode,
10780 GEN_INT (-param_ptr_offset))));
10781 m->fs.cfa_reg = stack_pointer_rtx;
10782 m->fs.cfa_offset = param_ptr_offset;
10783 m->fs.sp_offset = param_ptr_offset;
10784 m->fs.realigned = false;
10786 add_reg_note (insn, REG_CFA_DEF_CFA,
10787 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10788 GEN_INT (param_ptr_offset)));
10789 RTX_FRAME_RELATED_P (insn) = 1;
10791 if (!call_used_regs[REGNO (crtl->drap_reg)])
10792 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10795 /* At this point the stack pointer must be valid, and we must have
10796 restored all of the registers. We may not have deallocated the
10797 entire stack frame. We've delayed this until now because it may
10798 be possible to merge the local stack deallocation with the
10799 deallocation forced by ix86_static_chain_on_stack. */
10800 gcc_assert (m->fs.sp_valid);
10801 gcc_assert (!m->fs.fp_valid);
10802 gcc_assert (!m->fs.realigned);
10803 if (m->fs.sp_offset != UNITS_PER_WORD)
10805 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10806 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10810 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10812 /* Sibcall epilogues don't want a return instruction. */
10815 m->fs = frame_state_save;
10819 /* Emit vzeroupper if needed. */
10820 if (TARGET_VZEROUPPER
10821 && !TREE_THIS_VOLATILE (cfun->decl)
10822 && !cfun->machine->caller_return_avx256_p)
10823 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10825 if (crtl->args.pops_args && crtl->args.size)
10827 rtx popc = GEN_INT (crtl->args.pops_args);
10829 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10830 address, do explicit add, and jump indirectly to the caller. */
10832 if (crtl->args.pops_args >= 65536)
10834 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10837 /* There is no "pascal" calling convention in any 64bit ABI. */
10838 gcc_assert (!TARGET_64BIT);
10840 insn = emit_insn (gen_pop (ecx));
10841 m->fs.cfa_offset -= UNITS_PER_WORD;
10842 m->fs.sp_offset -= UNITS_PER_WORD;
10844 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10845 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10846 add_reg_note (insn, REG_CFA_REGISTER,
10847 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10848 RTX_FRAME_RELATED_P (insn) = 1;
10850 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10852 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10855 emit_jump_insn (gen_simple_return_pop_internal (popc));
10858 emit_jump_insn (gen_simple_return_internal ());
10860 /* Restore the state back to the state from the prologue,
10861 so that it's correct for the next epilogue. */
10862 m->fs = frame_state_save;
10865 /* Reset from the function's potential modifications. */
10868 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10869 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10871 if (pic_offset_table_rtx)
10872 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10874 /* Mach-O doesn't support labels at the end of objects, so if
10875 it looks like we might want one, insert a NOP. */
10877 rtx insn = get_last_insn ();
10880 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10881 insn = PREV_INSN (insn);
10885 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10886 fputs ("\tnop\n", file);
10892 /* Return a scratch register to use in the split stack prologue. The
10893 split stack prologue is used for -fsplit-stack. It is the first
10894 instructions in the function, even before the regular prologue.
10895 The scratch register can be any caller-saved register which is not
10896 used for parameters or for the static chain. */
10898 static unsigned int
10899 split_stack_prologue_scratch_regno (void)
10908 is_fastcall = (lookup_attribute ("fastcall",
10909 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10911 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10915 if (DECL_STATIC_CHAIN (cfun->decl))
10917 sorry ("-fsplit-stack does not support fastcall with "
10918 "nested function");
10919 return INVALID_REGNUM;
10923 else if (regparm < 3)
10925 if (!DECL_STATIC_CHAIN (cfun->decl))
10931 sorry ("-fsplit-stack does not support 2 register "
10932 " parameters for a nested function");
10933 return INVALID_REGNUM;
10940 /* FIXME: We could make this work by pushing a register
10941 around the addition and comparison. */
10942 sorry ("-fsplit-stack does not support 3 register parameters");
10943 return INVALID_REGNUM;
10948 /* A SYMBOL_REF for the function which allocates new stackspace for
10951 static GTY(()) rtx split_stack_fn;
10953 /* A SYMBOL_REF for the more stack function when using the large
10956 static GTY(()) rtx split_stack_fn_large;
10958 /* Handle -fsplit-stack. These are the first instructions in the
10959 function, even before the regular prologue. */
10962 ix86_expand_split_stack_prologue (void)
10964 struct ix86_frame frame;
10965 HOST_WIDE_INT allocate;
10966 unsigned HOST_WIDE_INT args_size;
10967 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10968 rtx scratch_reg = NULL_RTX;
10969 rtx varargs_label = NULL_RTX;
10972 gcc_assert (flag_split_stack && reload_completed);
10974 ix86_finalize_stack_realign_flags ();
10975 ix86_compute_frame_layout (&frame);
10976 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10978 /* This is the label we will branch to if we have enough stack
10979 space. We expect the basic block reordering pass to reverse this
10980 branch if optimizing, so that we branch in the unlikely case. */
10981 label = gen_label_rtx ();
10983 /* We need to compare the stack pointer minus the frame size with
10984 the stack boundary in the TCB. The stack boundary always gives
10985 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10986 can compare directly. Otherwise we need to do an addition. */
10988 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10989 UNSPEC_STACK_CHECK);
10990 limit = gen_rtx_CONST (Pmode, limit);
10991 limit = gen_rtx_MEM (Pmode, limit);
10992 if (allocate < SPLIT_STACK_AVAILABLE)
10993 current = stack_pointer_rtx;
10996 unsigned int scratch_regno;
10999 /* We need a scratch register to hold the stack pointer minus
11000 the required frame size. Since this is the very start of the
11001 function, the scratch register can be any caller-saved
11002 register which is not used for parameters. */
11003 offset = GEN_INT (- allocate);
11004 scratch_regno = split_stack_prologue_scratch_regno ();
11005 if (scratch_regno == INVALID_REGNUM)
11007 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11008 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11010 /* We don't use ix86_gen_add3 in this case because it will
11011 want to split to lea, but when not optimizing the insn
11012 will not be split after this point. */
11013 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11014 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11019 emit_move_insn (scratch_reg, offset);
11020 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11021 stack_pointer_rtx));
11023 current = scratch_reg;
11026 ix86_expand_branch (GEU, current, limit, label);
11027 jump_insn = get_last_insn ();
11028 JUMP_LABEL (jump_insn) = label;
11030 /* Mark the jump as very likely to be taken. */
11031 add_reg_note (jump_insn, REG_BR_PROB,
11032 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11034 if (split_stack_fn == NULL_RTX)
11035 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11036 fn = split_stack_fn;
11038 /* Get more stack space. We pass in the desired stack space and the
11039 size of the arguments to copy to the new stack. In 32-bit mode
11040 we push the parameters; __morestack will return on a new stack
11041 anyhow. In 64-bit mode we pass the parameters in r10 and
11043 allocate_rtx = GEN_INT (allocate);
11044 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11045 call_fusage = NULL_RTX;
11050 reg10 = gen_rtx_REG (Pmode, R10_REG);
11051 reg11 = gen_rtx_REG (Pmode, R11_REG);
11053 /* If this function uses a static chain, it will be in %r10.
11054 Preserve it across the call to __morestack. */
11055 if (DECL_STATIC_CHAIN (cfun->decl))
11059 rax = gen_rtx_REG (Pmode, AX_REG);
11060 emit_move_insn (rax, reg10);
11061 use_reg (&call_fusage, rax);
11064 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11066 HOST_WIDE_INT argval;
11068 /* When using the large model we need to load the address
11069 into a register, and we've run out of registers. So we
11070 switch to a different calling convention, and we call a
11071 different function: __morestack_large. We pass the
11072 argument size in the upper 32 bits of r10 and pass the
11073 frame size in the lower 32 bits. */
11074 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11075 gcc_assert ((args_size & 0xffffffff) == args_size);
11077 if (split_stack_fn_large == NULL_RTX)
11078 split_stack_fn_large =
11079 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11081 if (ix86_cmodel == CM_LARGE_PIC)
11085 label = gen_label_rtx ();
11086 emit_label (label);
11087 LABEL_PRESERVE_P (label) = 1;
11088 emit_insn (gen_set_rip_rex64 (reg10, label));
11089 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11090 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11091 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11093 x = gen_rtx_CONST (Pmode, x);
11094 emit_move_insn (reg11, x);
11095 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11096 x = gen_const_mem (Pmode, x);
11097 emit_move_insn (reg11, x);
11100 emit_move_insn (reg11, split_stack_fn_large);
11104 argval = ((args_size << 16) << 16) + allocate;
11105 emit_move_insn (reg10, GEN_INT (argval));
11109 emit_move_insn (reg10, allocate_rtx);
11110 emit_move_insn (reg11, GEN_INT (args_size));
11111 use_reg (&call_fusage, reg11);
11114 use_reg (&call_fusage, reg10);
11118 emit_insn (gen_push (GEN_INT (args_size)));
11119 emit_insn (gen_push (allocate_rtx));
11121 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11122 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11124 add_function_usage_to (call_insn, call_fusage);
11126 /* In order to make call/return prediction work right, we now need
11127 to execute a return instruction. See
11128 libgcc/config/i386/morestack.S for the details on how this works.
11130 For flow purposes gcc must not see this as a return
11131 instruction--we need control flow to continue at the subsequent
11132 label. Therefore, we use an unspec. */
11133 gcc_assert (crtl->args.pops_args < 65536);
11134 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11136 /* If we are in 64-bit mode and this function uses a static chain,
11137 we saved %r10 in %rax before calling _morestack. */
11138 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11139 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11140 gen_rtx_REG (Pmode, AX_REG));
11142 /* If this function calls va_start, we need to store a pointer to
11143 the arguments on the old stack, because they may not have been
11144 all copied to the new stack. At this point the old stack can be
11145 found at the frame pointer value used by __morestack, because
11146 __morestack has set that up before calling back to us. Here we
11147 store that pointer in a scratch register, and in
11148 ix86_expand_prologue we store the scratch register in a stack
11150 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11152 unsigned int scratch_regno;
11156 scratch_regno = split_stack_prologue_scratch_regno ();
11157 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11158 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11162 return address within this function
11163 return address of caller of this function
11165 So we add three words to get to the stack arguments.
11169 return address within this function
11170 first argument to __morestack
11171 second argument to __morestack
11172 return address of caller of this function
11174 So we add five words to get to the stack arguments.
11176 words = TARGET_64BIT ? 3 : 5;
11177 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11178 gen_rtx_PLUS (Pmode, frame_reg,
11179 GEN_INT (words * UNITS_PER_WORD))));
11181 varargs_label = gen_label_rtx ();
11182 emit_jump_insn (gen_jump (varargs_label));
11183 JUMP_LABEL (get_last_insn ()) = varargs_label;
11188 emit_label (label);
11189 LABEL_NUSES (label) = 1;
11191 /* If this function calls va_start, we now have to set the scratch
11192 register for the case where we do not call __morestack. In this
11193 case we need to set it based on the stack pointer. */
11194 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11196 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11197 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11198 GEN_INT (UNITS_PER_WORD))));
11200 emit_label (varargs_label);
11201 LABEL_NUSES (varargs_label) = 1;
11205 /* We may have to tell the dataflow pass that the split stack prologue
11206 is initializing a scratch register. */
11209 ix86_live_on_entry (bitmap regs)
11211 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11213 gcc_assert (flag_split_stack);
11214 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11218 /* Determine if op is suitable SUBREG RTX for address. */
11221 ix86_address_subreg_operand (rtx op)
11223 enum machine_mode mode;
11228 mode = GET_MODE (op);
11230 if (GET_MODE_CLASS (mode) != MODE_INT)
11233 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11234 failures when the register is one word out of a two word structure. */
11235 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11238 /* Allow only SUBREGs of non-eliminable hard registers. */
11239 return register_no_elim_operand (op, mode);
11242 /* Extract the parts of an RTL expression that is a valid memory address
11243 for an instruction. Return 0 if the structure of the address is
11244 grossly off. Return -1 if the address contains ASHIFT, so it is not
11245 strictly valid, but still used for computing length of lea instruction. */
11248 ix86_decompose_address (rtx addr, struct ix86_address *out)
11250 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11251 rtx base_reg, index_reg;
11252 HOST_WIDE_INT scale = 1;
11253 rtx scale_rtx = NULL_RTX;
11256 enum ix86_address_seg seg = SEG_DEFAULT;
11258 /* Allow zero-extended SImode addresses,
11259 they will be emitted with addr32 prefix. */
11260 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11262 if (GET_CODE (addr) == ZERO_EXTEND
11263 && GET_MODE (XEXP (addr, 0)) == SImode)
11264 addr = XEXP (addr, 0);
11265 else if (GET_CODE (addr) == AND
11266 && const_32bit_mask (XEXP (addr, 1), DImode))
11268 addr = XEXP (addr, 0);
11270 /* Strip subreg. */
11271 if (GET_CODE (addr) == SUBREG
11272 && GET_MODE (SUBREG_REG (addr)) == SImode)
11273 addr = SUBREG_REG (addr);
11279 else if (GET_CODE (addr) == SUBREG)
11281 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11286 else if (GET_CODE (addr) == PLUS)
11288 rtx addends[4], op;
11296 addends[n++] = XEXP (op, 1);
11299 while (GET_CODE (op) == PLUS);
11304 for (i = n; i >= 0; --i)
11307 switch (GET_CODE (op))
11312 index = XEXP (op, 0);
11313 scale_rtx = XEXP (op, 1);
11319 index = XEXP (op, 0);
11320 tmp = XEXP (op, 1);
11321 if (!CONST_INT_P (tmp))
11323 scale = INTVAL (tmp);
11324 if ((unsigned HOST_WIDE_INT) scale > 3)
11326 scale = 1 << scale;
11330 if (XINT (op, 1) == UNSPEC_TP
11331 && TARGET_TLS_DIRECT_SEG_REFS
11332 && seg == SEG_DEFAULT)
11333 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11339 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11366 else if (GET_CODE (addr) == MULT)
11368 index = XEXP (addr, 0); /* index*scale */
11369 scale_rtx = XEXP (addr, 1);
11371 else if (GET_CODE (addr) == ASHIFT)
11373 /* We're called for lea too, which implements ashift on occasion. */
11374 index = XEXP (addr, 0);
11375 tmp = XEXP (addr, 1);
11376 if (!CONST_INT_P (tmp))
11378 scale = INTVAL (tmp);
11379 if ((unsigned HOST_WIDE_INT) scale > 3)
11381 scale = 1 << scale;
11385 disp = addr; /* displacement */
11391 else if (GET_CODE (index) == SUBREG
11392 && ix86_address_subreg_operand (SUBREG_REG (index)))
11398 /* Extract the integral value of scale. */
11401 if (!CONST_INT_P (scale_rtx))
11403 scale = INTVAL (scale_rtx);
11406 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11407 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11409 /* Avoid useless 0 displacement. */
11410 if (disp == const0_rtx && (base || index))
11413 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11414 if (base_reg && index_reg && scale == 1
11415 && (index_reg == arg_pointer_rtx
11416 || index_reg == frame_pointer_rtx
11417 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11420 tmp = base, base = index, index = tmp;
11421 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11424 /* Special case: %ebp cannot be encoded as a base without a displacement.
11428 && (base_reg == hard_frame_pointer_rtx
11429 || base_reg == frame_pointer_rtx
11430 || base_reg == arg_pointer_rtx
11431 || (REG_P (base_reg)
11432 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11433 || REGNO (base_reg) == R13_REG))))
11436 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11437 Avoid this by transforming to [%esi+0].
11438 Reload calls address legitimization without cfun defined, so we need
11439 to test cfun for being non-NULL. */
11440 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11441 && base_reg && !index_reg && !disp
11442 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11445 /* Special case: encode reg+reg instead of reg*2. */
11446 if (!base && index && scale == 2)
11447 base = index, base_reg = index_reg, scale = 1;
11449 /* Special case: scaling cannot be encoded without base or displacement. */
11450 if (!base && !disp && index && scale != 1)
11454 out->index = index;
11456 out->scale = scale;
11462 /* Return cost of the memory address x.
11463 For i386, it is better to use a complex address than let gcc copy
11464 the address into a reg and make a new pseudo. But not if the address
11465 requires to two regs - that would mean more pseudos with longer
11468 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11470 struct ix86_address parts;
11472 int ok = ix86_decompose_address (x, &parts);
11476 if (parts.base && GET_CODE (parts.base) == SUBREG)
11477 parts.base = SUBREG_REG (parts.base);
11478 if (parts.index && GET_CODE (parts.index) == SUBREG)
11479 parts.index = SUBREG_REG (parts.index);
11481 /* Attempt to minimize number of registers in the address. */
11483 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11485 && (!REG_P (parts.index)
11486 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11490 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11492 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11493 && parts.base != parts.index)
11496 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11497 since it's predecode logic can't detect the length of instructions
11498 and it degenerates to vector decoded. Increase cost of such
11499 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11500 to split such addresses or even refuse such addresses at all.
11502 Following addressing modes are affected:
11507 The first and last case may be avoidable by explicitly coding the zero in
11508 memory address, but I don't have AMD-K6 machine handy to check this
11512 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11513 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11514 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11520 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11521 this is used for to form addresses to local data when -fPIC is in
11525 darwin_local_data_pic (rtx disp)
11527 return (GET_CODE (disp) == UNSPEC
11528 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11531 /* Determine if a given RTX is a valid constant. We already know this
11532 satisfies CONSTANT_P. */
11535 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11537 switch (GET_CODE (x))
11542 if (GET_CODE (x) == PLUS)
11544 if (!CONST_INT_P (XEXP (x, 1)))
11549 if (TARGET_MACHO && darwin_local_data_pic (x))
11552 /* Only some unspecs are valid as "constants". */
11553 if (GET_CODE (x) == UNSPEC)
11554 switch (XINT (x, 1))
11557 case UNSPEC_GOTOFF:
11558 case UNSPEC_PLTOFF:
11559 return TARGET_64BIT;
11561 case UNSPEC_NTPOFF:
11562 x = XVECEXP (x, 0, 0);
11563 return (GET_CODE (x) == SYMBOL_REF
11564 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11565 case UNSPEC_DTPOFF:
11566 x = XVECEXP (x, 0, 0);
11567 return (GET_CODE (x) == SYMBOL_REF
11568 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11573 /* We must have drilled down to a symbol. */
11574 if (GET_CODE (x) == LABEL_REF)
11576 if (GET_CODE (x) != SYMBOL_REF)
11581 /* TLS symbols are never valid. */
11582 if (SYMBOL_REF_TLS_MODEL (x))
11585 /* DLLIMPORT symbols are never valid. */
11586 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11587 && SYMBOL_REF_DLLIMPORT_P (x))
11591 /* mdynamic-no-pic */
11592 if (MACHO_DYNAMIC_NO_PIC_P)
11593 return machopic_symbol_defined_p (x);
11598 if (GET_MODE (x) == TImode
11599 && x != CONST0_RTX (TImode)
11605 if (!standard_sse_constant_p (x))
11612 /* Otherwise we handle everything else in the move patterns. */
11616 /* Determine if it's legal to put X into the constant pool. This
11617 is not possible for the address of thread-local symbols, which
11618 is checked above. */
11621 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11623 /* We can always put integral constants and vectors in memory. */
11624 switch (GET_CODE (x))
11634 return !ix86_legitimate_constant_p (mode, x);
11638 /* Nonzero if the constant value X is a legitimate general operand
11639 when generating PIC code. It is given that flag_pic is on and
11640 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11643 legitimate_pic_operand_p (rtx x)
11647 switch (GET_CODE (x))
11650 inner = XEXP (x, 0);
11651 if (GET_CODE (inner) == PLUS
11652 && CONST_INT_P (XEXP (inner, 1)))
11653 inner = XEXP (inner, 0);
11655 /* Only some unspecs are valid as "constants". */
11656 if (GET_CODE (inner) == UNSPEC)
11657 switch (XINT (inner, 1))
11660 case UNSPEC_GOTOFF:
11661 case UNSPEC_PLTOFF:
11662 return TARGET_64BIT;
11664 x = XVECEXP (inner, 0, 0);
11665 return (GET_CODE (x) == SYMBOL_REF
11666 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11667 case UNSPEC_MACHOPIC_OFFSET:
11668 return legitimate_pic_address_disp_p (x);
11676 return legitimate_pic_address_disp_p (x);
11683 /* Determine if a given CONST RTX is a valid memory displacement
11687 legitimate_pic_address_disp_p (rtx disp)
11691 /* In 64bit mode we can allow direct addresses of symbols and labels
11692 when they are not dynamic symbols. */
11695 rtx op0 = disp, op1;
11697 switch (GET_CODE (disp))
11703 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11705 op0 = XEXP (XEXP (disp, 0), 0);
11706 op1 = XEXP (XEXP (disp, 0), 1);
11707 if (!CONST_INT_P (op1)
11708 || INTVAL (op1) >= 16*1024*1024
11709 || INTVAL (op1) < -16*1024*1024)
11711 if (GET_CODE (op0) == LABEL_REF)
11713 if (GET_CODE (op0) != SYMBOL_REF)
11718 /* TLS references should always be enclosed in UNSPEC. */
11719 if (SYMBOL_REF_TLS_MODEL (op0))
11721 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11722 && ix86_cmodel != CM_LARGE_PIC)
11730 if (GET_CODE (disp) != CONST)
11732 disp = XEXP (disp, 0);
11736 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11737 of GOT tables. We should not need these anyway. */
11738 if (GET_CODE (disp) != UNSPEC
11739 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11740 && XINT (disp, 1) != UNSPEC_GOTOFF
11741 && XINT (disp, 1) != UNSPEC_PCREL
11742 && XINT (disp, 1) != UNSPEC_PLTOFF))
11745 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11746 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11752 if (GET_CODE (disp) == PLUS)
11754 if (!CONST_INT_P (XEXP (disp, 1)))
11756 disp = XEXP (disp, 0);
11760 if (TARGET_MACHO && darwin_local_data_pic (disp))
11763 if (GET_CODE (disp) != UNSPEC)
11766 switch (XINT (disp, 1))
11771 /* We need to check for both symbols and labels because VxWorks loads
11772 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11774 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11775 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11776 case UNSPEC_GOTOFF:
11777 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11778 While ABI specify also 32bit relocation but we don't produce it in
11779 small PIC model at all. */
11780 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11781 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11783 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11785 case UNSPEC_GOTTPOFF:
11786 case UNSPEC_GOTNTPOFF:
11787 case UNSPEC_INDNTPOFF:
11790 disp = XVECEXP (disp, 0, 0);
11791 return (GET_CODE (disp) == SYMBOL_REF
11792 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11793 case UNSPEC_NTPOFF:
11794 disp = XVECEXP (disp, 0, 0);
11795 return (GET_CODE (disp) == SYMBOL_REF
11796 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11797 case UNSPEC_DTPOFF:
11798 disp = XVECEXP (disp, 0, 0);
11799 return (GET_CODE (disp) == SYMBOL_REF
11800 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11806 /* Recognizes RTL expressions that are valid memory addresses for an
11807 instruction. The MODE argument is the machine mode for the MEM
11808 expression that wants to use this address.
11810 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11811 convert common non-canonical forms to canonical form so that they will
11815 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11816 rtx addr, bool strict)
11818 struct ix86_address parts;
11819 rtx base, index, disp;
11820 HOST_WIDE_INT scale;
11822 if (ix86_decompose_address (addr, &parts) <= 0)
11823 /* Decomposition failed. */
11827 index = parts.index;
11829 scale = parts.scale;
11831 /* Validate base register. */
11838 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
11839 reg = SUBREG_REG (base);
11841 /* Base is not a register. */
11844 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
11847 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11848 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11849 /* Base is not valid. */
11853 /* Validate index register. */
11860 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
11861 reg = SUBREG_REG (index);
11863 /* Index is not a register. */
11866 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
11869 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11870 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11871 /* Index is not valid. */
11875 /* Index and base should have the same mode. */
11877 && GET_MODE (base) != GET_MODE (index))
11880 /* Validate scale factor. */
11884 /* Scale without index. */
11887 if (scale != 2 && scale != 4 && scale != 8)
11888 /* Scale is not a valid multiplier. */
11892 /* Validate displacement. */
11895 if (GET_CODE (disp) == CONST
11896 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11897 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11898 switch (XINT (XEXP (disp, 0), 1))
11900 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11901 used. While ABI specify also 32bit relocations, we don't produce
11902 them at all and use IP relative instead. */
11904 case UNSPEC_GOTOFF:
11905 gcc_assert (flag_pic);
11907 goto is_legitimate_pic;
11909 /* 64bit address unspec. */
11912 case UNSPEC_GOTPCREL:
11914 gcc_assert (flag_pic);
11915 goto is_legitimate_pic;
11917 case UNSPEC_GOTTPOFF:
11918 case UNSPEC_GOTNTPOFF:
11919 case UNSPEC_INDNTPOFF:
11920 case UNSPEC_NTPOFF:
11921 case UNSPEC_DTPOFF:
11924 case UNSPEC_STACK_CHECK:
11925 gcc_assert (flag_split_stack);
11929 /* Invalid address unspec. */
11933 else if (SYMBOLIC_CONST (disp)
11937 && MACHOPIC_INDIRECT
11938 && !machopic_operand_p (disp)
11944 if (TARGET_64BIT && (index || base))
11946 /* foo@dtpoff(%rX) is ok. */
11947 if (GET_CODE (disp) != CONST
11948 || GET_CODE (XEXP (disp, 0)) != PLUS
11949 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11950 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11951 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11952 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11953 /* Non-constant pic memory reference. */
11956 else if ((!TARGET_MACHO || flag_pic)
11957 && ! legitimate_pic_address_disp_p (disp))
11958 /* Displacement is an invalid pic construct. */
11961 else if (MACHO_DYNAMIC_NO_PIC_P
11962 && !ix86_legitimate_constant_p (Pmode, disp))
11963 /* displacment must be referenced via non_lazy_pointer */
11967 /* This code used to verify that a symbolic pic displacement
11968 includes the pic_offset_table_rtx register.
11970 While this is good idea, unfortunately these constructs may
11971 be created by "adds using lea" optimization for incorrect
11980 This code is nonsensical, but results in addressing
11981 GOT table with pic_offset_table_rtx base. We can't
11982 just refuse it easily, since it gets matched by
11983 "addsi3" pattern, that later gets split to lea in the
11984 case output register differs from input. While this
11985 can be handled by separate addsi pattern for this case
11986 that never results in lea, this seems to be easier and
11987 correct fix for crash to disable this test. */
11989 else if (GET_CODE (disp) != LABEL_REF
11990 && !CONST_INT_P (disp)
11991 && (GET_CODE (disp) != CONST
11992 || !ix86_legitimate_constant_p (Pmode, disp))
11993 && (GET_CODE (disp) != SYMBOL_REF
11994 || !ix86_legitimate_constant_p (Pmode, disp)))
11995 /* Displacement is not constant. */
11997 else if (TARGET_64BIT
11998 && !x86_64_immediate_operand (disp, VOIDmode))
11999 /* Displacement is out of range. */
12003 /* Everything looks valid. */
12007 /* Determine if a given RTX is a valid constant address. */
12010 constant_address_p (rtx x)
12012 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12015 /* Return a unique alias set for the GOT. */
12017 static alias_set_type
12018 ix86_GOT_alias_set (void)
12020 static alias_set_type set = -1;
12022 set = new_alias_set ();
12026 /* Return a legitimate reference for ORIG (an address) using the
12027 register REG. If REG is 0, a new pseudo is generated.
12029 There are two types of references that must be handled:
12031 1. Global data references must load the address from the GOT, via
12032 the PIC reg. An insn is emitted to do this load, and the reg is
12035 2. Static data references, constant pool addresses, and code labels
12036 compute the address as an offset from the GOT, whose base is in
12037 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12038 differentiate them from global data objects. The returned
12039 address is the PIC reg + an unspec constant.
12041 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12042 reg also appears in the address. */
12045 legitimize_pic_address (rtx orig, rtx reg)
12048 rtx new_rtx = orig;
12052 if (TARGET_MACHO && !TARGET_64BIT)
12055 reg = gen_reg_rtx (Pmode);
12056 /* Use the generic Mach-O PIC machinery. */
12057 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12061 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12063 else if (TARGET_64BIT
12064 && ix86_cmodel != CM_SMALL_PIC
12065 && gotoff_operand (addr, Pmode))
12068 /* This symbol may be referenced via a displacement from the PIC
12069 base address (@GOTOFF). */
12071 if (reload_in_progress)
12072 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12073 if (GET_CODE (addr) == CONST)
12074 addr = XEXP (addr, 0);
12075 if (GET_CODE (addr) == PLUS)
12077 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12079 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12082 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12083 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12085 tmpreg = gen_reg_rtx (Pmode);
12088 emit_move_insn (tmpreg, new_rtx);
12092 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12093 tmpreg, 1, OPTAB_DIRECT);
12096 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12098 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12100 /* This symbol may be referenced via a displacement from the PIC
12101 base address (@GOTOFF). */
12103 if (reload_in_progress)
12104 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12105 if (GET_CODE (addr) == CONST)
12106 addr = XEXP (addr, 0);
12107 if (GET_CODE (addr) == PLUS)
12109 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12111 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12114 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12115 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12116 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12120 emit_move_insn (reg, new_rtx);
12124 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12125 /* We can't use @GOTOFF for text labels on VxWorks;
12126 see gotoff_operand. */
12127 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12129 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12131 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12132 return legitimize_dllimport_symbol (addr, true);
12133 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12134 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12135 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12137 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12138 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12142 /* For x64 PE-COFF there is no GOT table. So we use address
12144 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12146 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12147 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12150 reg = gen_reg_rtx (Pmode);
12151 emit_move_insn (reg, new_rtx);
12154 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12156 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12157 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12158 new_rtx = gen_const_mem (Pmode, new_rtx);
12159 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12162 reg = gen_reg_rtx (Pmode);
12163 /* Use directly gen_movsi, otherwise the address is loaded
12164 into register for CSE. We don't want to CSE this addresses,
12165 instead we CSE addresses from the GOT table, so skip this. */
12166 emit_insn (gen_movsi (reg, new_rtx));
12171 /* This symbol must be referenced via a load from the
12172 Global Offset Table (@GOT). */
12174 if (reload_in_progress)
12175 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12176 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12177 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12179 new_rtx = force_reg (Pmode, new_rtx);
12180 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12181 new_rtx = gen_const_mem (Pmode, new_rtx);
12182 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12185 reg = gen_reg_rtx (Pmode);
12186 emit_move_insn (reg, new_rtx);
12192 if (CONST_INT_P (addr)
12193 && !x86_64_immediate_operand (addr, VOIDmode))
12197 emit_move_insn (reg, addr);
12201 new_rtx = force_reg (Pmode, addr);
12203 else if (GET_CODE (addr) == CONST)
12205 addr = XEXP (addr, 0);
12207 /* We must match stuff we generate before. Assume the only
12208 unspecs that can get here are ours. Not that we could do
12209 anything with them anyway.... */
12210 if (GET_CODE (addr) == UNSPEC
12211 || (GET_CODE (addr) == PLUS
12212 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12214 gcc_assert (GET_CODE (addr) == PLUS);
12216 if (GET_CODE (addr) == PLUS)
12218 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12220 /* Check first to see if this is a constant offset from a @GOTOFF
12221 symbol reference. */
12222 if (gotoff_operand (op0, Pmode)
12223 && CONST_INT_P (op1))
12227 if (reload_in_progress)
12228 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12229 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12231 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12232 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12233 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12237 emit_move_insn (reg, new_rtx);
12243 if (INTVAL (op1) < -16*1024*1024
12244 || INTVAL (op1) >= 16*1024*1024)
12246 if (!x86_64_immediate_operand (op1, Pmode))
12247 op1 = force_reg (Pmode, op1);
12248 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12254 base = legitimize_pic_address (XEXP (addr, 0), reg);
12255 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12256 base == reg ? NULL_RTX : reg);
12258 if (CONST_INT_P (new_rtx))
12259 new_rtx = plus_constant (base, INTVAL (new_rtx));
12262 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12264 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12265 new_rtx = XEXP (new_rtx, 1);
12267 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12275 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12278 get_thread_pointer (bool to_reg)
12280 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12282 if (GET_MODE (tp) != Pmode)
12283 tp = convert_to_mode (Pmode, tp, 1);
12286 tp = copy_addr_to_reg (tp);
12291 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12293 static GTY(()) rtx ix86_tls_symbol;
12296 ix86_tls_get_addr (void)
12298 if (!ix86_tls_symbol)
12301 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12302 ? "___tls_get_addr" : "__tls_get_addr");
12304 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12307 return ix86_tls_symbol;
12310 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12312 static GTY(()) rtx ix86_tls_module_base_symbol;
12315 ix86_tls_module_base (void)
12317 if (!ix86_tls_module_base_symbol)
12319 ix86_tls_module_base_symbol
12320 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12322 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12323 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12326 return ix86_tls_module_base_symbol;
12329 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12330 false if we expect this to be used for a memory address and true if
12331 we expect to load the address into a register. */
12334 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12336 rtx dest, base, off;
12337 rtx pic = NULL_RTX, tp = NULL_RTX;
12342 case TLS_MODEL_GLOBAL_DYNAMIC:
12343 dest = gen_reg_rtx (Pmode);
12348 pic = pic_offset_table_rtx;
12351 pic = gen_reg_rtx (Pmode);
12352 emit_insn (gen_set_got (pic));
12356 if (TARGET_GNU2_TLS)
12359 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12361 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12363 tp = get_thread_pointer (true);
12364 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12366 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12370 rtx caddr = ix86_tls_get_addr ();
12374 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12377 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12378 insns = get_insns ();
12381 RTL_CONST_CALL_P (insns) = 1;
12382 emit_libcall_block (insns, dest, rax, x);
12385 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12389 case TLS_MODEL_LOCAL_DYNAMIC:
12390 base = gen_reg_rtx (Pmode);
12395 pic = pic_offset_table_rtx;
12398 pic = gen_reg_rtx (Pmode);
12399 emit_insn (gen_set_got (pic));
12403 if (TARGET_GNU2_TLS)
12405 rtx tmp = ix86_tls_module_base ();
12408 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12410 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12412 tp = get_thread_pointer (true);
12413 set_unique_reg_note (get_last_insn (), REG_EQUAL,
12414 gen_rtx_MINUS (Pmode, tmp, tp));
12418 rtx caddr = ix86_tls_get_addr ();
12422 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12425 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12426 insns = get_insns ();
12429 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12430 share the LD_BASE result with other LD model accesses. */
12431 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12432 UNSPEC_TLS_LD_BASE);
12434 RTL_CONST_CALL_P (insns) = 1;
12435 emit_libcall_block (insns, base, rax, eqv);
12438 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12441 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12442 off = gen_rtx_CONST (Pmode, off);
12444 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12446 if (TARGET_GNU2_TLS)
12448 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12450 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12454 case TLS_MODEL_INITIAL_EXEC:
12457 if (TARGET_SUN_TLS)
12459 /* The Sun linker took the AMD64 TLS spec literally
12460 and can only handle %rax as destination of the
12461 initial executable code sequence. */
12463 dest = gen_reg_rtx (Pmode);
12464 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12469 type = UNSPEC_GOTNTPOFF;
12473 if (reload_in_progress)
12474 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12475 pic = pic_offset_table_rtx;
12476 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12478 else if (!TARGET_ANY_GNU_TLS)
12480 pic = gen_reg_rtx (Pmode);
12481 emit_insn (gen_set_got (pic));
12482 type = UNSPEC_GOTTPOFF;
12487 type = UNSPEC_INDNTPOFF;
12490 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12491 off = gen_rtx_CONST (Pmode, off);
12493 off = gen_rtx_PLUS (Pmode, pic, off);
12494 off = gen_const_mem (Pmode, off);
12495 set_mem_alias_set (off, ix86_GOT_alias_set ());
12497 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12499 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12500 off = force_reg (Pmode, off);
12501 return gen_rtx_PLUS (Pmode, base, off);
12505 base = get_thread_pointer (true);
12506 dest = gen_reg_rtx (Pmode);
12507 emit_insn (gen_subsi3 (dest, base, off));
12511 case TLS_MODEL_LOCAL_EXEC:
12512 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12513 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12514 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12515 off = gen_rtx_CONST (Pmode, off);
12517 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12519 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12520 return gen_rtx_PLUS (Pmode, base, off);
12524 base = get_thread_pointer (true);
12525 dest = gen_reg_rtx (Pmode);
12526 emit_insn (gen_subsi3 (dest, base, off));
12531 gcc_unreachable ();
12537 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12540 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12541 htab_t dllimport_map;
12544 get_dllimport_decl (tree decl)
12546 struct tree_map *h, in;
12549 const char *prefix;
12550 size_t namelen, prefixlen;
12555 if (!dllimport_map)
12556 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12558 in.hash = htab_hash_pointer (decl);
12559 in.base.from = decl;
12560 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12561 h = (struct tree_map *) *loc;
12565 *loc = h = ggc_alloc_tree_map ();
12567 h->base.from = decl;
12568 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12569 VAR_DECL, NULL, ptr_type_node);
12570 DECL_ARTIFICIAL (to) = 1;
12571 DECL_IGNORED_P (to) = 1;
12572 DECL_EXTERNAL (to) = 1;
12573 TREE_READONLY (to) = 1;
12575 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12576 name = targetm.strip_name_encoding (name);
12577 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12578 ? "*__imp_" : "*__imp__";
12579 namelen = strlen (name);
12580 prefixlen = strlen (prefix);
12581 imp_name = (char *) alloca (namelen + prefixlen + 1);
12582 memcpy (imp_name, prefix, prefixlen);
12583 memcpy (imp_name + prefixlen, name, namelen + 1);
12585 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12586 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12587 SET_SYMBOL_REF_DECL (rtl, to);
12588 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12590 rtl = gen_const_mem (Pmode, rtl);
12591 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12593 SET_DECL_RTL (to, rtl);
12594 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12599 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12600 true if we require the result be a register. */
12603 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12608 gcc_assert (SYMBOL_REF_DECL (symbol));
12609 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12611 x = DECL_RTL (imp_decl);
12613 x = force_reg (Pmode, x);
12617 /* Try machine-dependent ways of modifying an illegitimate address
12618 to be legitimate. If we find one, return the new, valid address.
12619 This macro is used in only one place: `memory_address' in explow.c.
12621 OLDX is the address as it was before break_out_memory_refs was called.
12622 In some cases it is useful to look at this to decide what needs to be done.
12624 It is always safe for this macro to do nothing. It exists to recognize
12625 opportunities to optimize the output.
12627 For the 80386, we handle X+REG by loading X into a register R and
12628 using R+REG. R will go in a general reg and indexing will be used.
12629 However, if REG is a broken-out memory address or multiplication,
12630 nothing needs to be done because REG can certainly go in a general reg.
12632 When -fpic is used, special handling is needed for symbolic references.
12633 See comments by legitimize_pic_address in i386.c for details. */
12636 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12637 enum machine_mode mode)
12642 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12644 return legitimize_tls_address (x, (enum tls_model) log, false);
12645 if (GET_CODE (x) == CONST
12646 && GET_CODE (XEXP (x, 0)) == PLUS
12647 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12648 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12650 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12651 (enum tls_model) log, false);
12652 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12655 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12657 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12658 return legitimize_dllimport_symbol (x, true);
12659 if (GET_CODE (x) == CONST
12660 && GET_CODE (XEXP (x, 0)) == PLUS
12661 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12662 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12664 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12665 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12669 if (flag_pic && SYMBOLIC_CONST (x))
12670 return legitimize_pic_address (x, 0);
12673 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12674 return machopic_indirect_data_reference (x, 0);
12677 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12678 if (GET_CODE (x) == ASHIFT
12679 && CONST_INT_P (XEXP (x, 1))
12680 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12683 log = INTVAL (XEXP (x, 1));
12684 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12685 GEN_INT (1 << log));
12688 if (GET_CODE (x) == PLUS)
12690 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12692 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12693 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12694 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12697 log = INTVAL (XEXP (XEXP (x, 0), 1));
12698 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12699 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12700 GEN_INT (1 << log));
12703 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12704 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12705 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12708 log = INTVAL (XEXP (XEXP (x, 1), 1));
12709 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12710 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12711 GEN_INT (1 << log));
12714 /* Put multiply first if it isn't already. */
12715 if (GET_CODE (XEXP (x, 1)) == MULT)
12717 rtx tmp = XEXP (x, 0);
12718 XEXP (x, 0) = XEXP (x, 1);
12723 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12724 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12725 created by virtual register instantiation, register elimination, and
12726 similar optimizations. */
12727 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12730 x = gen_rtx_PLUS (Pmode,
12731 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12732 XEXP (XEXP (x, 1), 0)),
12733 XEXP (XEXP (x, 1), 1));
12737 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12738 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12739 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12740 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12741 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12742 && CONSTANT_P (XEXP (x, 1)))
12745 rtx other = NULL_RTX;
12747 if (CONST_INT_P (XEXP (x, 1)))
12749 constant = XEXP (x, 1);
12750 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12752 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12754 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12755 other = XEXP (x, 1);
12763 x = gen_rtx_PLUS (Pmode,
12764 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12765 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12766 plus_constant (other, INTVAL (constant)));
12770 if (changed && ix86_legitimate_address_p (mode, x, false))
12773 if (GET_CODE (XEXP (x, 0)) == MULT)
12776 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12779 if (GET_CODE (XEXP (x, 1)) == MULT)
12782 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12786 && REG_P (XEXP (x, 1))
12787 && REG_P (XEXP (x, 0)))
12790 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12793 x = legitimize_pic_address (x, 0);
12796 if (changed && ix86_legitimate_address_p (mode, x, false))
12799 if (REG_P (XEXP (x, 0)))
12801 rtx temp = gen_reg_rtx (Pmode);
12802 rtx val = force_operand (XEXP (x, 1), temp);
12805 if (GET_MODE (val) != Pmode)
12806 val = convert_to_mode (Pmode, val, 1);
12807 emit_move_insn (temp, val);
12810 XEXP (x, 1) = temp;
12814 else if (REG_P (XEXP (x, 1)))
12816 rtx temp = gen_reg_rtx (Pmode);
12817 rtx val = force_operand (XEXP (x, 0), temp);
12820 if (GET_MODE (val) != Pmode)
12821 val = convert_to_mode (Pmode, val, 1);
12822 emit_move_insn (temp, val);
12825 XEXP (x, 0) = temp;
12833 /* Print an integer constant expression in assembler syntax. Addition
12834 and subtraction are the only arithmetic that may appear in these
12835 expressions. FILE is the stdio stream to write to, X is the rtx, and
12836 CODE is the operand print code from the output string. */
12839 output_pic_addr_const (FILE *file, rtx x, int code)
12843 switch (GET_CODE (x))
12846 gcc_assert (flag_pic);
12851 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12852 output_addr_const (file, x);
12855 const char *name = XSTR (x, 0);
12857 /* Mark the decl as referenced so that cgraph will
12858 output the function. */
12859 if (SYMBOL_REF_DECL (x))
12860 mark_decl_referenced (SYMBOL_REF_DECL (x));
12863 if (MACHOPIC_INDIRECT
12864 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12865 name = machopic_indirection_name (x, /*stub_p=*/true);
12867 assemble_name (file, name);
12869 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12870 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12871 fputs ("@PLT", file);
12878 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12879 assemble_name (asm_out_file, buf);
12883 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12887 /* This used to output parentheses around the expression,
12888 but that does not work on the 386 (either ATT or BSD assembler). */
12889 output_pic_addr_const (file, XEXP (x, 0), code);
12893 if (GET_MODE (x) == VOIDmode)
12895 /* We can use %d if the number is <32 bits and positive. */
12896 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12897 fprintf (file, "0x%lx%08lx",
12898 (unsigned long) CONST_DOUBLE_HIGH (x),
12899 (unsigned long) CONST_DOUBLE_LOW (x));
12901 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12904 /* We can't handle floating point constants;
12905 TARGET_PRINT_OPERAND must handle them. */
12906 output_operand_lossage ("floating constant misused");
12910 /* Some assemblers need integer constants to appear first. */
12911 if (CONST_INT_P (XEXP (x, 0)))
12913 output_pic_addr_const (file, XEXP (x, 0), code);
12915 output_pic_addr_const (file, XEXP (x, 1), code);
12919 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12920 output_pic_addr_const (file, XEXP (x, 1), code);
12922 output_pic_addr_const (file, XEXP (x, 0), code);
12928 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12929 output_pic_addr_const (file, XEXP (x, 0), code);
12931 output_pic_addr_const (file, XEXP (x, 1), code);
12933 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12937 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12939 bool f = i386_asm_output_addr_const_extra (file, x);
12944 gcc_assert (XVECLEN (x, 0) == 1);
12945 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12946 switch (XINT (x, 1))
12949 fputs ("@GOT", file);
12951 case UNSPEC_GOTOFF:
12952 fputs ("@GOTOFF", file);
12954 case UNSPEC_PLTOFF:
12955 fputs ("@PLTOFF", file);
12958 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12959 "(%rip)" : "[rip]", file);
12961 case UNSPEC_GOTPCREL:
12962 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12963 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12965 case UNSPEC_GOTTPOFF:
12966 /* FIXME: This might be @TPOFF in Sun ld too. */
12967 fputs ("@gottpoff", file);
12970 fputs ("@tpoff", file);
12972 case UNSPEC_NTPOFF:
12974 fputs ("@tpoff", file);
12976 fputs ("@ntpoff", file);
12978 case UNSPEC_DTPOFF:
12979 fputs ("@dtpoff", file);
12981 case UNSPEC_GOTNTPOFF:
12983 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12984 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12986 fputs ("@gotntpoff", file);
12988 case UNSPEC_INDNTPOFF:
12989 fputs ("@indntpoff", file);
12992 case UNSPEC_MACHOPIC_OFFSET:
12994 machopic_output_function_base_name (file);
12998 output_operand_lossage ("invalid UNSPEC as operand");
13004 output_operand_lossage ("invalid expression as operand");
13008 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13009 We need to emit DTP-relative relocations. */
13011 static void ATTRIBUTE_UNUSED
13012 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13014 fputs (ASM_LONG, file);
13015 output_addr_const (file, x);
13016 fputs ("@dtpoff", file);
13022 fputs (", 0", file);
13025 gcc_unreachable ();
13029 /* Return true if X is a representation of the PIC register. This copes
13030 with calls from ix86_find_base_term, where the register might have
13031 been replaced by a cselib value. */
13034 ix86_pic_register_p (rtx x)
13036 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13037 return (pic_offset_table_rtx
13038 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13040 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13043 /* Helper function for ix86_delegitimize_address.
13044 Attempt to delegitimize TLS local-exec accesses. */
13047 ix86_delegitimize_tls_address (rtx orig_x)
13049 rtx x = orig_x, unspec;
13050 struct ix86_address addr;
13052 if (!TARGET_TLS_DIRECT_SEG_REFS)
13056 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13058 if (ix86_decompose_address (x, &addr) == 0
13059 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13060 || addr.disp == NULL_RTX
13061 || GET_CODE (addr.disp) != CONST)
13063 unspec = XEXP (addr.disp, 0);
13064 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13065 unspec = XEXP (unspec, 0);
13066 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13068 x = XVECEXP (unspec, 0, 0);
13069 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13070 if (unspec != XEXP (addr.disp, 0))
13071 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13074 rtx idx = addr.index;
13075 if (addr.scale != 1)
13076 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13077 x = gen_rtx_PLUS (Pmode, idx, x);
13080 x = gen_rtx_PLUS (Pmode, addr.base, x);
13081 if (MEM_P (orig_x))
13082 x = replace_equiv_address_nv (orig_x, x);
13086 /* In the name of slightly smaller debug output, and to cater to
13087 general assembler lossage, recognize PIC+GOTOFF and turn it back
13088 into a direct symbol reference.
13090 On Darwin, this is necessary to avoid a crash, because Darwin
13091 has a different PIC label for each routine but the DWARF debugging
13092 information is not associated with any particular routine, so it's
13093 necessary to remove references to the PIC label from RTL stored by
13094 the DWARF output code. */
13097 ix86_delegitimize_address (rtx x)
13099 rtx orig_x = delegitimize_mem_from_attrs (x);
13100 /* addend is NULL or some rtx if x is something+GOTOFF where
13101 something doesn't include the PIC register. */
13102 rtx addend = NULL_RTX;
13103 /* reg_addend is NULL or a multiple of some register. */
13104 rtx reg_addend = NULL_RTX;
13105 /* const_addend is NULL or a const_int. */
13106 rtx const_addend = NULL_RTX;
13107 /* This is the result, or NULL. */
13108 rtx result = NULL_RTX;
13117 if (GET_CODE (x) != CONST
13118 || GET_CODE (XEXP (x, 0)) != UNSPEC
13119 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13120 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13121 || !MEM_P (orig_x))
13122 return ix86_delegitimize_tls_address (orig_x);
13123 x = XVECEXP (XEXP (x, 0), 0, 0);
13124 if (GET_MODE (orig_x) != GET_MODE (x))
13126 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13134 if (GET_CODE (x) != PLUS
13135 || GET_CODE (XEXP (x, 1)) != CONST)
13136 return ix86_delegitimize_tls_address (orig_x);
13138 if (ix86_pic_register_p (XEXP (x, 0)))
13139 /* %ebx + GOT/GOTOFF */
13141 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13143 /* %ebx + %reg * scale + GOT/GOTOFF */
13144 reg_addend = XEXP (x, 0);
13145 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13146 reg_addend = XEXP (reg_addend, 1);
13147 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13148 reg_addend = XEXP (reg_addend, 0);
13151 reg_addend = NULL_RTX;
13152 addend = XEXP (x, 0);
13156 addend = XEXP (x, 0);
13158 x = XEXP (XEXP (x, 1), 0);
13159 if (GET_CODE (x) == PLUS
13160 && CONST_INT_P (XEXP (x, 1)))
13162 const_addend = XEXP (x, 1);
13166 if (GET_CODE (x) == UNSPEC
13167 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13168 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13169 result = XVECEXP (x, 0, 0);
13171 if (TARGET_MACHO && darwin_local_data_pic (x)
13172 && !MEM_P (orig_x))
13173 result = XVECEXP (x, 0, 0);
13176 return ix86_delegitimize_tls_address (orig_x);
13179 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13181 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13184 /* If the rest of original X doesn't involve the PIC register, add
13185 addend and subtract pic_offset_table_rtx. This can happen e.g.
13187 leal (%ebx, %ecx, 4), %ecx
13189 movl foo@GOTOFF(%ecx), %edx
13190 in which case we return (%ecx - %ebx) + foo. */
13191 if (pic_offset_table_rtx)
13192 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13193 pic_offset_table_rtx),
13198 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13200 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13201 if (result == NULL_RTX)
13207 /* If X is a machine specific address (i.e. a symbol or label being
13208 referenced as a displacement from the GOT implemented using an
13209 UNSPEC), then return the base term. Otherwise return X. */
13212 ix86_find_base_term (rtx x)
13218 if (GET_CODE (x) != CONST)
13220 term = XEXP (x, 0);
13221 if (GET_CODE (term) == PLUS
13222 && (CONST_INT_P (XEXP (term, 1))
13223 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13224 term = XEXP (term, 0);
13225 if (GET_CODE (term) != UNSPEC
13226 || (XINT (term, 1) != UNSPEC_GOTPCREL
13227 && XINT (term, 1) != UNSPEC_PCREL))
13230 return XVECEXP (term, 0, 0);
13233 return ix86_delegitimize_address (x);
13237 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13238 int fp, FILE *file)
13240 const char *suffix;
13242 if (mode == CCFPmode || mode == CCFPUmode)
13244 code = ix86_fp_compare_code_to_integer (code);
13248 code = reverse_condition (code);
13299 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13303 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13304 Those same assemblers have the same but opposite lossage on cmov. */
13305 if (mode == CCmode)
13306 suffix = fp ? "nbe" : "a";
13307 else if (mode == CCCmode)
13310 gcc_unreachable ();
13326 gcc_unreachable ();
13330 gcc_assert (mode == CCmode || mode == CCCmode);
13347 gcc_unreachable ();
13351 /* ??? As above. */
13352 gcc_assert (mode == CCmode || mode == CCCmode);
13353 suffix = fp ? "nb" : "ae";
13356 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13360 /* ??? As above. */
13361 if (mode == CCmode)
13363 else if (mode == CCCmode)
13364 suffix = fp ? "nb" : "ae";
13366 gcc_unreachable ();
13369 suffix = fp ? "u" : "p";
13372 suffix = fp ? "nu" : "np";
13375 gcc_unreachable ();
13377 fputs (suffix, file);
13380 /* Print the name of register X to FILE based on its machine mode and number.
13381 If CODE is 'w', pretend the mode is HImode.
13382 If CODE is 'b', pretend the mode is QImode.
13383 If CODE is 'k', pretend the mode is SImode.
13384 If CODE is 'q', pretend the mode is DImode.
13385 If CODE is 'x', pretend the mode is V4SFmode.
13386 If CODE is 't', pretend the mode is V8SFmode.
13387 If CODE is 'h', pretend the reg is the 'high' byte register.
13388 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13389 If CODE is 'd', duplicate the operand for AVX instruction.
13393 print_reg (rtx x, int code, FILE *file)
13396 bool duplicated = code == 'd' && TARGET_AVX;
13398 gcc_assert (x == pc_rtx
13399 || (REGNO (x) != ARG_POINTER_REGNUM
13400 && REGNO (x) != FRAME_POINTER_REGNUM
13401 && REGNO (x) != FLAGS_REG
13402 && REGNO (x) != FPSR_REG
13403 && REGNO (x) != FPCR_REG));
13405 if (ASSEMBLER_DIALECT == ASM_ATT)
13410 gcc_assert (TARGET_64BIT);
13411 fputs ("rip", file);
13415 if (code == 'w' || MMX_REG_P (x))
13417 else if (code == 'b')
13419 else if (code == 'k')
13421 else if (code == 'q')
13423 else if (code == 'y')
13425 else if (code == 'h')
13427 else if (code == 'x')
13429 else if (code == 't')
13432 code = GET_MODE_SIZE (GET_MODE (x));
13434 /* Irritatingly, AMD extended registers use different naming convention
13435 from the normal registers. */
13436 if (REX_INT_REG_P (x))
13438 gcc_assert (TARGET_64BIT);
13442 error ("extended registers have no high halves");
13445 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13448 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13451 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13454 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13457 error ("unsupported operand size for extended register");
13467 if (STACK_TOP_P (x))
13476 if (! ANY_FP_REG_P (x))
13477 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13482 reg = hi_reg_name[REGNO (x)];
13485 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13487 reg = qi_reg_name[REGNO (x)];
13490 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13492 reg = qi_high_reg_name[REGNO (x)];
13497 gcc_assert (!duplicated);
13499 fputs (hi_reg_name[REGNO (x)] + 1, file);
13504 gcc_unreachable ();
13510 if (ASSEMBLER_DIALECT == ASM_ATT)
13511 fprintf (file, ", %%%s", reg);
13513 fprintf (file, ", %s", reg);
13517 /* Locate some local-dynamic symbol still in use by this function
13518 so that we can print its name in some tls_local_dynamic_base
13522 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13526 if (GET_CODE (x) == SYMBOL_REF
13527 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13529 cfun->machine->some_ld_name = XSTR (x, 0);
13536 static const char *
13537 get_some_local_dynamic_name (void)
13541 if (cfun->machine->some_ld_name)
13542 return cfun->machine->some_ld_name;
13544 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13545 if (NONDEBUG_INSN_P (insn)
13546 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13547 return cfun->machine->some_ld_name;
13552 /* Meaning of CODE:
13553 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13554 C -- print opcode suffix for set/cmov insn.
13555 c -- like C, but print reversed condition
13556 F,f -- likewise, but for floating-point.
13557 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13559 R -- print the prefix for register names.
13560 z -- print the opcode suffix for the size of the current operand.
13561 Z -- likewise, with special suffixes for x87 instructions.
13562 * -- print a star (in certain assembler syntax)
13563 A -- print an absolute memory reference.
13564 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13565 s -- print a shift double count, followed by the assemblers argument
13567 b -- print the QImode name of the register for the indicated operand.
13568 %b0 would print %al if operands[0] is reg 0.
13569 w -- likewise, print the HImode name of the register.
13570 k -- likewise, print the SImode name of the register.
13571 q -- likewise, print the DImode name of the register.
13572 x -- likewise, print the V4SFmode name of the register.
13573 t -- likewise, print the V8SFmode name of the register.
13574 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13575 y -- print "st(0)" instead of "st" as a register.
13576 d -- print duplicated register operand for AVX instruction.
13577 D -- print condition for SSE cmp instruction.
13578 P -- if PIC, print an @PLT suffix.
13579 p -- print raw symbol name.
13580 X -- don't print any sort of PIC '@' suffix for a symbol.
13581 & -- print some in-use local-dynamic symbol name.
13582 H -- print a memory address offset by 8; used for sse high-parts
13583 Y -- print condition for XOP pcom* instruction.
13584 + -- print a branch hint as 'cs' or 'ds' prefix
13585 ; -- print a semicolon (after prefixes due to bug in older gas).
13586 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13587 @ -- print a segment register of thread base pointer load
13591 ix86_print_operand (FILE *file, rtx x, int code)
13598 if (ASSEMBLER_DIALECT == ASM_ATT)
13604 const char *name = get_some_local_dynamic_name ();
13606 output_operand_lossage ("'%%&' used without any "
13607 "local dynamic TLS references");
13609 assemble_name (file, name);
13614 switch (ASSEMBLER_DIALECT)
13621 /* Intel syntax. For absolute addresses, registers should not
13622 be surrounded by braces. */
13626 ix86_print_operand (file, x, 0);
13633 gcc_unreachable ();
13636 ix86_print_operand (file, x, 0);
13641 if (ASSEMBLER_DIALECT == ASM_ATT)
13646 if (ASSEMBLER_DIALECT == ASM_ATT)
13651 if (ASSEMBLER_DIALECT == ASM_ATT)
13656 if (ASSEMBLER_DIALECT == ASM_ATT)
13661 if (ASSEMBLER_DIALECT == ASM_ATT)
13666 if (ASSEMBLER_DIALECT == ASM_ATT)
13671 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13673 /* Opcodes don't get size suffixes if using Intel opcodes. */
13674 if (ASSEMBLER_DIALECT == ASM_INTEL)
13677 switch (GET_MODE_SIZE (GET_MODE (x)))
13696 output_operand_lossage
13697 ("invalid operand size for operand code '%c'", code);
13702 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13704 (0, "non-integer operand used with operand code '%c'", code);
13708 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13709 if (ASSEMBLER_DIALECT == ASM_INTEL)
13712 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13714 switch (GET_MODE_SIZE (GET_MODE (x)))
13717 #ifdef HAVE_AS_IX86_FILDS
13727 #ifdef HAVE_AS_IX86_FILDQ
13730 fputs ("ll", file);
13738 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13740 /* 387 opcodes don't get size suffixes
13741 if the operands are registers. */
13742 if (STACK_REG_P (x))
13745 switch (GET_MODE_SIZE (GET_MODE (x)))
13766 output_operand_lossage
13767 ("invalid operand type used with operand code '%c'", code);
13771 output_operand_lossage
13772 ("invalid operand size for operand code '%c'", code);
13790 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13792 ix86_print_operand (file, x, 0);
13793 fputs (", ", file);
13798 /* Little bit of braindamage here. The SSE compare instructions
13799 does use completely different names for the comparisons that the
13800 fp conditional moves. */
13803 switch (GET_CODE (x))
13806 fputs ("eq", file);
13809 fputs ("eq_us", file);
13812 fputs ("lt", file);
13815 fputs ("nge", file);
13818 fputs ("le", file);
13821 fputs ("ngt", file);
13824 fputs ("unord", file);
13827 fputs ("neq", file);
13830 fputs ("neq_oq", file);
13833 fputs ("ge", file);
13836 fputs ("nlt", file);
13839 fputs ("gt", file);
13842 fputs ("nle", file);
13845 fputs ("ord", file);
13848 output_operand_lossage ("operand is not a condition code, "
13849 "invalid operand code 'D'");
13855 switch (GET_CODE (x))
13859 fputs ("eq", file);
13863 fputs ("lt", file);
13867 fputs ("le", file);
13870 fputs ("unord", file);
13874 fputs ("neq", file);
13878 fputs ("nlt", file);
13882 fputs ("nle", file);
13885 fputs ("ord", file);
13888 output_operand_lossage ("operand is not a condition code, "
13889 "invalid operand code 'D'");
13895 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13896 if (ASSEMBLER_DIALECT == ASM_ATT)
13898 switch (GET_MODE (x))
13900 case HImode: putc ('w', file); break;
13902 case SFmode: putc ('l', file); break;
13904 case DFmode: putc ('q', file); break;
13905 default: gcc_unreachable ();
13912 if (!COMPARISON_P (x))
13914 output_operand_lossage ("operand is neither a constant nor a "
13915 "condition code, invalid operand code "
13919 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13922 if (!COMPARISON_P (x))
13924 output_operand_lossage ("operand is neither a constant nor a "
13925 "condition code, invalid operand code "
13929 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13930 if (ASSEMBLER_DIALECT == ASM_ATT)
13933 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13936 /* Like above, but reverse condition */
13938 /* Check to see if argument to %c is really a constant
13939 and not a condition code which needs to be reversed. */
13940 if (!COMPARISON_P (x))
13942 output_operand_lossage ("operand is neither a constant nor a "
13943 "condition code, invalid operand "
13947 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13950 if (!COMPARISON_P (x))
13952 output_operand_lossage ("operand is neither a constant nor a "
13953 "condition code, invalid operand "
13957 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13958 if (ASSEMBLER_DIALECT == ASM_ATT)
13961 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13965 /* It doesn't actually matter what mode we use here, as we're
13966 only going to use this for printing. */
13967 x = adjust_address_nv (x, DImode, 8);
13975 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13978 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13981 int pred_val = INTVAL (XEXP (x, 0));
13983 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13984 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13986 int taken = pred_val > REG_BR_PROB_BASE / 2;
13987 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13989 /* Emit hints only in the case default branch prediction
13990 heuristics would fail. */
13991 if (taken != cputaken)
13993 /* We use 3e (DS) prefix for taken branches and
13994 2e (CS) prefix for not taken branches. */
13996 fputs ("ds ; ", file);
13998 fputs ("cs ; ", file);
14006 switch (GET_CODE (x))
14009 fputs ("neq", file);
14012 fputs ("eq", file);
14016 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14020 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14024 fputs ("le", file);
14028 fputs ("lt", file);
14031 fputs ("unord", file);
14034 fputs ("ord", file);
14037 fputs ("ueq", file);
14040 fputs ("nlt", file);
14043 fputs ("nle", file);
14046 fputs ("ule", file);
14049 fputs ("ult", file);
14052 fputs ("une", file);
14055 output_operand_lossage ("operand is not a condition code, "
14056 "invalid operand code 'Y'");
14062 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14068 if (ASSEMBLER_DIALECT == ASM_ATT)
14071 /* The kernel uses a different segment register for performance
14072 reasons; a system call would not have to trash the userspace
14073 segment register, which would be expensive. */
14074 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14075 fputs ("fs", file);
14077 fputs ("gs", file);
14081 putc (TARGET_AVX2 ? 'i' : 'f', file);
14085 output_operand_lossage ("invalid operand code '%c'", code);
14090 print_reg (x, code, file);
14092 else if (MEM_P (x))
14094 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14095 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14096 && GET_MODE (x) != BLKmode)
14099 switch (GET_MODE_SIZE (GET_MODE (x)))
14101 case 1: size = "BYTE"; break;
14102 case 2: size = "WORD"; break;
14103 case 4: size = "DWORD"; break;
14104 case 8: size = "QWORD"; break;
14105 case 12: size = "TBYTE"; break;
14107 if (GET_MODE (x) == XFmode)
14112 case 32: size = "YMMWORD"; break;
14114 gcc_unreachable ();
14117 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14120 else if (code == 'w')
14122 else if (code == 'k')
14125 fputs (size, file);
14126 fputs (" PTR ", file);
14130 /* Avoid (%rip) for call operands. */
14131 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14132 && !CONST_INT_P (x))
14133 output_addr_const (file, x);
14134 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14135 output_operand_lossage ("invalid constraints for operand");
14137 output_address (x);
14140 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14145 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14146 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14148 if (ASSEMBLER_DIALECT == ASM_ATT)
14150 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14152 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14154 fprintf (file, "0x%08x", (unsigned int) l);
14157 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14162 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14163 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14165 if (ASSEMBLER_DIALECT == ASM_ATT)
14167 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14170 /* These float cases don't actually occur as immediate operands. */
14171 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14175 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14176 fputs (dstr, file);
14181 /* We have patterns that allow zero sets of memory, for instance.
14182 In 64-bit mode, we should probably support all 8-byte vectors,
14183 since we can in fact encode that into an immediate. */
14184 if (GET_CODE (x) == CONST_VECTOR)
14186 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14190 if (code != 'P' && code != 'p')
14192 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14194 if (ASSEMBLER_DIALECT == ASM_ATT)
14197 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14198 || GET_CODE (x) == LABEL_REF)
14200 if (ASSEMBLER_DIALECT == ASM_ATT)
14203 fputs ("OFFSET FLAT:", file);
14206 if (CONST_INT_P (x))
14207 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14208 else if (flag_pic || MACHOPIC_INDIRECT)
14209 output_pic_addr_const (file, x, code);
14211 output_addr_const (file, x);
14216 ix86_print_operand_punct_valid_p (unsigned char code)
14218 return (code == '@' || code == '*' || code == '+'
14219 || code == '&' || code == ';' || code == '~');
14222 /* Print a memory operand whose address is ADDR. */
14225 ix86_print_operand_address (FILE *file, rtx addr)
14227 struct ix86_address parts;
14228 rtx base, index, disp;
14230 int ok = ix86_decompose_address (addr, &parts);
14234 if (parts.base && GET_CODE (parts.base) == SUBREG)
14236 rtx tmp = SUBREG_REG (parts.base);
14237 parts.base = simplify_subreg (GET_MODE (parts.base),
14238 tmp, GET_MODE (tmp), 0);
14241 if (parts.index && GET_CODE (parts.index) == SUBREG)
14243 rtx tmp = SUBREG_REG (parts.index);
14244 parts.index = simplify_subreg (GET_MODE (parts.index),
14245 tmp, GET_MODE (tmp), 0);
14249 index = parts.index;
14251 scale = parts.scale;
14259 if (ASSEMBLER_DIALECT == ASM_ATT)
14261 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14264 gcc_unreachable ();
14267 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14268 if (TARGET_64BIT && !base && !index)
14272 if (GET_CODE (disp) == CONST
14273 && GET_CODE (XEXP (disp, 0)) == PLUS
14274 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14275 symbol = XEXP (XEXP (disp, 0), 0);
14277 if (GET_CODE (symbol) == LABEL_REF
14278 || (GET_CODE (symbol) == SYMBOL_REF
14279 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14282 if (!base && !index)
14284 /* Displacement only requires special attention. */
14286 if (CONST_INT_P (disp))
14288 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14289 fputs ("ds:", file);
14290 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14293 output_pic_addr_const (file, disp, 0);
14295 output_addr_const (file, disp);
14301 /* Print SImode registers for zero-extended addresses to force
14302 addr32 prefix. Otherwise print DImode registers to avoid it. */
14304 code = ((GET_CODE (addr) == ZERO_EXTEND
14305 || GET_CODE (addr) == AND)
14309 if (ASSEMBLER_DIALECT == ASM_ATT)
14314 output_pic_addr_const (file, disp, 0);
14315 else if (GET_CODE (disp) == LABEL_REF)
14316 output_asm_label (disp);
14318 output_addr_const (file, disp);
14323 print_reg (base, code, file);
14327 print_reg (index, code, file);
14329 fprintf (file, ",%d", scale);
14335 rtx offset = NULL_RTX;
14339 /* Pull out the offset of a symbol; print any symbol itself. */
14340 if (GET_CODE (disp) == CONST
14341 && GET_CODE (XEXP (disp, 0)) == PLUS
14342 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14344 offset = XEXP (XEXP (disp, 0), 1);
14345 disp = gen_rtx_CONST (VOIDmode,
14346 XEXP (XEXP (disp, 0), 0));
14350 output_pic_addr_const (file, disp, 0);
14351 else if (GET_CODE (disp) == LABEL_REF)
14352 output_asm_label (disp);
14353 else if (CONST_INT_P (disp))
14356 output_addr_const (file, disp);
14362 print_reg (base, code, file);
14365 if (INTVAL (offset) >= 0)
14367 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14371 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14378 print_reg (index, code, file);
14380 fprintf (file, "*%d", scale);
14387 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14390 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14394 if (GET_CODE (x) != UNSPEC)
14397 op = XVECEXP (x, 0, 0);
14398 switch (XINT (x, 1))
14400 case UNSPEC_GOTTPOFF:
14401 output_addr_const (file, op);
14402 /* FIXME: This might be @TPOFF in Sun ld. */
14403 fputs ("@gottpoff", file);
14406 output_addr_const (file, op);
14407 fputs ("@tpoff", file);
14409 case UNSPEC_NTPOFF:
14410 output_addr_const (file, op);
14412 fputs ("@tpoff", file);
14414 fputs ("@ntpoff", file);
14416 case UNSPEC_DTPOFF:
14417 output_addr_const (file, op);
14418 fputs ("@dtpoff", file);
14420 case UNSPEC_GOTNTPOFF:
14421 output_addr_const (file, op);
14423 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14424 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14426 fputs ("@gotntpoff", file);
14428 case UNSPEC_INDNTPOFF:
14429 output_addr_const (file, op);
14430 fputs ("@indntpoff", file);
14433 case UNSPEC_MACHOPIC_OFFSET:
14434 output_addr_const (file, op);
14436 machopic_output_function_base_name (file);
14440 case UNSPEC_STACK_CHECK:
14444 gcc_assert (flag_split_stack);
14446 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14447 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14449 gcc_unreachable ();
14452 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14463 /* Split one or more double-mode RTL references into pairs of half-mode
14464 references. The RTL can be REG, offsettable MEM, integer constant, or
14465 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14466 split and "num" is its length. lo_half and hi_half are output arrays
14467 that parallel "operands". */
14470 split_double_mode (enum machine_mode mode, rtx operands[],
14471 int num, rtx lo_half[], rtx hi_half[])
14473 enum machine_mode half_mode;
14479 half_mode = DImode;
14482 half_mode = SImode;
14485 gcc_unreachable ();
14488 byte = GET_MODE_SIZE (half_mode);
14492 rtx op = operands[num];
14494 /* simplify_subreg refuse to split volatile memory addresses,
14495 but we still have to handle it. */
14498 lo_half[num] = adjust_address (op, half_mode, 0);
14499 hi_half[num] = adjust_address (op, half_mode, byte);
14503 lo_half[num] = simplify_gen_subreg (half_mode, op,
14504 GET_MODE (op) == VOIDmode
14505 ? mode : GET_MODE (op), 0);
14506 hi_half[num] = simplify_gen_subreg (half_mode, op,
14507 GET_MODE (op) == VOIDmode
14508 ? mode : GET_MODE (op), byte);
14513 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14514 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14515 is the expression of the binary operation. The output may either be
14516 emitted here, or returned to the caller, like all output_* functions.
14518 There is no guarantee that the operands are the same mode, as they
14519 might be within FLOAT or FLOAT_EXTEND expressions. */
14521 #ifndef SYSV386_COMPAT
14522 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14523 wants to fix the assemblers because that causes incompatibility
14524 with gcc. No-one wants to fix gcc because that causes
14525 incompatibility with assemblers... You can use the option of
14526 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14527 #define SYSV386_COMPAT 1
14531 output_387_binary_op (rtx insn, rtx *operands)
14533 static char buf[40];
14536 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14538 #ifdef ENABLE_CHECKING
14539 /* Even if we do not want to check the inputs, this documents input
14540 constraints. Which helps in understanding the following code. */
14541 if (STACK_REG_P (operands[0])
14542 && ((REG_P (operands[1])
14543 && REGNO (operands[0]) == REGNO (operands[1])
14544 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14545 || (REG_P (operands[2])
14546 && REGNO (operands[0]) == REGNO (operands[2])
14547 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14548 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14551 gcc_assert (is_sse);
14554 switch (GET_CODE (operands[3]))
14557 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14558 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14566 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14567 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14575 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14576 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14584 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14585 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14593 gcc_unreachable ();
14600 strcpy (buf, ssep);
14601 if (GET_MODE (operands[0]) == SFmode)
14602 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14604 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14608 strcpy (buf, ssep + 1);
14609 if (GET_MODE (operands[0]) == SFmode)
14610 strcat (buf, "ss\t{%2, %0|%0, %2}");
14612 strcat (buf, "sd\t{%2, %0|%0, %2}");
14618 switch (GET_CODE (operands[3]))
14622 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14624 rtx temp = operands[2];
14625 operands[2] = operands[1];
14626 operands[1] = temp;
14629 /* know operands[0] == operands[1]. */
14631 if (MEM_P (operands[2]))
14637 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14639 if (STACK_TOP_P (operands[0]))
14640 /* How is it that we are storing to a dead operand[2]?
14641 Well, presumably operands[1] is dead too. We can't
14642 store the result to st(0) as st(0) gets popped on this
14643 instruction. Instead store to operands[2] (which I
14644 think has to be st(1)). st(1) will be popped later.
14645 gcc <= 2.8.1 didn't have this check and generated
14646 assembly code that the Unixware assembler rejected. */
14647 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14649 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14653 if (STACK_TOP_P (operands[0]))
14654 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14656 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14661 if (MEM_P (operands[1]))
14667 if (MEM_P (operands[2]))
14673 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14676 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14677 derived assemblers, confusingly reverse the direction of
14678 the operation for fsub{r} and fdiv{r} when the
14679 destination register is not st(0). The Intel assembler
14680 doesn't have this brain damage. Read !SYSV386_COMPAT to
14681 figure out what the hardware really does. */
14682 if (STACK_TOP_P (operands[0]))
14683 p = "{p\t%0, %2|rp\t%2, %0}";
14685 p = "{rp\t%2, %0|p\t%0, %2}";
14687 if (STACK_TOP_P (operands[0]))
14688 /* As above for fmul/fadd, we can't store to st(0). */
14689 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14691 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14696 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14699 if (STACK_TOP_P (operands[0]))
14700 p = "{rp\t%0, %1|p\t%1, %0}";
14702 p = "{p\t%1, %0|rp\t%0, %1}";
14704 if (STACK_TOP_P (operands[0]))
14705 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14707 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14712 if (STACK_TOP_P (operands[0]))
14714 if (STACK_TOP_P (operands[1]))
14715 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14717 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14720 else if (STACK_TOP_P (operands[1]))
14723 p = "{\t%1, %0|r\t%0, %1}";
14725 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14731 p = "{r\t%2, %0|\t%0, %2}";
14733 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14739 gcc_unreachable ();
14746 /* Return needed mode for entity in optimize_mode_switching pass. */
14749 ix86_mode_needed (int entity, rtx insn)
14751 enum attr_i387_cw mode;
14753 /* The mode UNINITIALIZED is used to store control word after a
14754 function call or ASM pattern. The mode ANY specify that function
14755 has no requirements on the control word and make no changes in the
14756 bits we are interested in. */
14759 || (NONJUMP_INSN_P (insn)
14760 && (asm_noperands (PATTERN (insn)) >= 0
14761 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14762 return I387_CW_UNINITIALIZED;
14764 if (recog_memoized (insn) < 0)
14765 return I387_CW_ANY;
14767 mode = get_attr_i387_cw (insn);
14772 if (mode == I387_CW_TRUNC)
14777 if (mode == I387_CW_FLOOR)
14782 if (mode == I387_CW_CEIL)
14787 if (mode == I387_CW_MASK_PM)
14792 gcc_unreachable ();
14795 return I387_CW_ANY;
14798 /* Output code to initialize control word copies used by trunc?f?i and
14799 rounding patterns. CURRENT_MODE is set to current control word,
14800 while NEW_MODE is set to new control word. */
14803 emit_i387_cw_initialization (int mode)
14805 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14808 enum ix86_stack_slot slot;
14810 rtx reg = gen_reg_rtx (HImode);
14812 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14813 emit_move_insn (reg, copy_rtx (stored_mode));
14815 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14816 || optimize_function_for_size_p (cfun))
14820 case I387_CW_TRUNC:
14821 /* round toward zero (truncate) */
14822 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14823 slot = SLOT_CW_TRUNC;
14826 case I387_CW_FLOOR:
14827 /* round down toward -oo */
14828 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14829 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14830 slot = SLOT_CW_FLOOR;
14834 /* round up toward +oo */
14835 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14836 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14837 slot = SLOT_CW_CEIL;
14840 case I387_CW_MASK_PM:
14841 /* mask precision exception for nearbyint() */
14842 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14843 slot = SLOT_CW_MASK_PM;
14847 gcc_unreachable ();
14854 case I387_CW_TRUNC:
14855 /* round toward zero (truncate) */
14856 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14857 slot = SLOT_CW_TRUNC;
14860 case I387_CW_FLOOR:
14861 /* round down toward -oo */
14862 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14863 slot = SLOT_CW_FLOOR;
14867 /* round up toward +oo */
14868 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14869 slot = SLOT_CW_CEIL;
14872 case I387_CW_MASK_PM:
14873 /* mask precision exception for nearbyint() */
14874 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14875 slot = SLOT_CW_MASK_PM;
14879 gcc_unreachable ();
14883 gcc_assert (slot < MAX_386_STACK_LOCALS);
14885 new_mode = assign_386_stack_local (HImode, slot);
14886 emit_move_insn (new_mode, reg);
14889 /* Output code for INSN to convert a float to a signed int. OPERANDS
14890 are the insn operands. The output may be [HSD]Imode and the input
14891 operand may be [SDX]Fmode. */
14894 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
14896 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14897 int dimode_p = GET_MODE (operands[0]) == DImode;
14898 int round_mode = get_attr_i387_cw (insn);
14900 /* Jump through a hoop or two for DImode, since the hardware has no
14901 non-popping instruction. We used to do this a different way, but
14902 that was somewhat fragile and broke with post-reload splitters. */
14903 if ((dimode_p || fisttp) && !stack_top_dies)
14904 output_asm_insn ("fld\t%y1", operands);
14906 gcc_assert (STACK_TOP_P (operands[1]));
14907 gcc_assert (MEM_P (operands[0]));
14908 gcc_assert (GET_MODE (operands[1]) != TFmode);
14911 output_asm_insn ("fisttp%Z0\t%0", operands);
14914 if (round_mode != I387_CW_ANY)
14915 output_asm_insn ("fldcw\t%3", operands);
14916 if (stack_top_dies || dimode_p)
14917 output_asm_insn ("fistp%Z0\t%0", operands);
14919 output_asm_insn ("fist%Z0\t%0", operands);
14920 if (round_mode != I387_CW_ANY)
14921 output_asm_insn ("fldcw\t%2", operands);
14927 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14928 have the values zero or one, indicates the ffreep insn's operand
14929 from the OPERANDS array. */
14931 static const char *
14932 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14934 if (TARGET_USE_FFREEP)
14935 #ifdef HAVE_AS_IX86_FFREEP
14936 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14939 static char retval[32];
14940 int regno = REGNO (operands[opno]);
14942 gcc_assert (FP_REGNO_P (regno));
14944 regno -= FIRST_STACK_REG;
14946 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14951 return opno ? "fstp\t%y1" : "fstp\t%y0";
14955 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14956 should be used. UNORDERED_P is true when fucom should be used. */
14959 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
14961 int stack_top_dies;
14962 rtx cmp_op0, cmp_op1;
14963 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14967 cmp_op0 = operands[0];
14968 cmp_op1 = operands[1];
14972 cmp_op0 = operands[1];
14973 cmp_op1 = operands[2];
14978 if (GET_MODE (operands[0]) == SFmode)
14980 return "%vucomiss\t{%1, %0|%0, %1}";
14982 return "%vcomiss\t{%1, %0|%0, %1}";
14985 return "%vucomisd\t{%1, %0|%0, %1}";
14987 return "%vcomisd\t{%1, %0|%0, %1}";
14990 gcc_assert (STACK_TOP_P (cmp_op0));
14992 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14994 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14996 if (stack_top_dies)
14998 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14999 return output_387_ffreep (operands, 1);
15002 return "ftst\n\tfnstsw\t%0";
15005 if (STACK_REG_P (cmp_op1)
15007 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15008 && REGNO (cmp_op1) != FIRST_STACK_REG)
15010 /* If both the top of the 387 stack dies, and the other operand
15011 is also a stack register that dies, then this must be a
15012 `fcompp' float compare */
15016 /* There is no double popping fcomi variant. Fortunately,
15017 eflags is immune from the fstp's cc clobbering. */
15019 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15021 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15022 return output_387_ffreep (operands, 0);
15027 return "fucompp\n\tfnstsw\t%0";
15029 return "fcompp\n\tfnstsw\t%0";
15034 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15036 static const char * const alt[16] =
15038 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15039 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15040 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15041 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15043 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15044 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15048 "fcomi\t{%y1, %0|%0, %y1}",
15049 "fcomip\t{%y1, %0|%0, %y1}",
15050 "fucomi\t{%y1, %0|%0, %y1}",
15051 "fucomip\t{%y1, %0|%0, %y1}",
15062 mask = eflags_p << 3;
15063 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15064 mask |= unordered_p << 1;
15065 mask |= stack_top_dies;
15067 gcc_assert (mask < 16);
15076 ix86_output_addr_vec_elt (FILE *file, int value)
15078 const char *directive = ASM_LONG;
15082 directive = ASM_QUAD;
15084 gcc_assert (!TARGET_64BIT);
15087 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15091 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15093 const char *directive = ASM_LONG;
15096 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15097 directive = ASM_QUAD;
15099 gcc_assert (!TARGET_64BIT);
15101 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15102 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15103 fprintf (file, "%s%s%d-%s%d\n",
15104 directive, LPREFIX, value, LPREFIX, rel);
15105 else if (HAVE_AS_GOTOFF_IN_DATA)
15106 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15108 else if (TARGET_MACHO)
15110 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15111 machopic_output_function_base_name (file);
15116 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15117 GOT_SYMBOL_NAME, LPREFIX, value);
15120 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15124 ix86_expand_clear (rtx dest)
15128 /* We play register width games, which are only valid after reload. */
15129 gcc_assert (reload_completed);
15131 /* Avoid HImode and its attendant prefix byte. */
15132 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15133 dest = gen_rtx_REG (SImode, REGNO (dest));
15134 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15136 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15137 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15139 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15140 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15146 /* X is an unchanging MEM. If it is a constant pool reference, return
15147 the constant pool rtx, else NULL. */
15150 maybe_get_pool_constant (rtx x)
15152 x = ix86_delegitimize_address (XEXP (x, 0));
15154 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15155 return get_pool_constant (x);
15161 ix86_expand_move (enum machine_mode mode, rtx operands[])
15164 enum tls_model model;
15169 if (GET_CODE (op1) == SYMBOL_REF)
15171 model = SYMBOL_REF_TLS_MODEL (op1);
15174 op1 = legitimize_tls_address (op1, model, true);
15175 op1 = force_operand (op1, op0);
15178 if (GET_MODE (op1) != mode)
15179 op1 = convert_to_mode (mode, op1, 1);
15181 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15182 && SYMBOL_REF_DLLIMPORT_P (op1))
15183 op1 = legitimize_dllimport_symbol (op1, false);
15185 else if (GET_CODE (op1) == CONST
15186 && GET_CODE (XEXP (op1, 0)) == PLUS
15187 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15189 rtx addend = XEXP (XEXP (op1, 0), 1);
15190 rtx symbol = XEXP (XEXP (op1, 0), 0);
15193 model = SYMBOL_REF_TLS_MODEL (symbol);
15195 tmp = legitimize_tls_address (symbol, model, true);
15196 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15197 && SYMBOL_REF_DLLIMPORT_P (symbol))
15198 tmp = legitimize_dllimport_symbol (symbol, true);
15202 tmp = force_operand (tmp, NULL);
15203 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15204 op0, 1, OPTAB_DIRECT);
15207 if (GET_MODE (tmp) != mode)
15208 op1 = convert_to_mode (mode, tmp, 1);
15212 if ((flag_pic || MACHOPIC_INDIRECT)
15213 && symbolic_operand (op1, mode))
15215 if (TARGET_MACHO && !TARGET_64BIT)
15218 /* dynamic-no-pic */
15219 if (MACHOPIC_INDIRECT)
15221 rtx temp = ((reload_in_progress
15222 || ((op0 && REG_P (op0))
15224 ? op0 : gen_reg_rtx (Pmode));
15225 op1 = machopic_indirect_data_reference (op1, temp);
15227 op1 = machopic_legitimize_pic_address (op1, mode,
15228 temp == op1 ? 0 : temp);
15230 if (op0 != op1 && GET_CODE (op0) != MEM)
15232 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15236 if (GET_CODE (op0) == MEM)
15237 op1 = force_reg (Pmode, op1);
15241 if (GET_CODE (temp) != REG)
15242 temp = gen_reg_rtx (Pmode);
15243 temp = legitimize_pic_address (op1, temp);
15248 /* dynamic-no-pic */
15254 op1 = force_reg (mode, op1);
15255 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15257 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15258 op1 = legitimize_pic_address (op1, reg);
15261 if (GET_MODE (op1) != mode)
15262 op1 = convert_to_mode (mode, op1, 1);
15269 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15270 || !push_operand (op0, mode))
15272 op1 = force_reg (mode, op1);
15274 if (push_operand (op0, mode)
15275 && ! general_no_elim_operand (op1, mode))
15276 op1 = copy_to_mode_reg (mode, op1);
15278 /* Force large constants in 64bit compilation into register
15279 to get them CSEed. */
15280 if (can_create_pseudo_p ()
15281 && (mode == DImode) && TARGET_64BIT
15282 && immediate_operand (op1, mode)
15283 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15284 && !register_operand (op0, mode)
15286 op1 = copy_to_mode_reg (mode, op1);
15288 if (can_create_pseudo_p ()
15289 && FLOAT_MODE_P (mode)
15290 && GET_CODE (op1) == CONST_DOUBLE)
15292 /* If we are loading a floating point constant to a register,
15293 force the value to memory now, since we'll get better code
15294 out the back end. */
15296 op1 = validize_mem (force_const_mem (mode, op1));
15297 if (!register_operand (op0, mode))
15299 rtx temp = gen_reg_rtx (mode);
15300 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15301 emit_move_insn (op0, temp);
15307 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15311 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15313 rtx op0 = operands[0], op1 = operands[1];
15314 unsigned int align = GET_MODE_ALIGNMENT (mode);
15316 /* Force constants other than zero into memory. We do not know how
15317 the instructions used to build constants modify the upper 64 bits
15318 of the register, once we have that information we may be able
15319 to handle some of them more efficiently. */
15320 if (can_create_pseudo_p ()
15321 && register_operand (op0, mode)
15322 && (CONSTANT_P (op1)
15323 || (GET_CODE (op1) == SUBREG
15324 && CONSTANT_P (SUBREG_REG (op1))))
15325 && !standard_sse_constant_p (op1))
15326 op1 = validize_mem (force_const_mem (mode, op1));
15328 /* We need to check memory alignment for SSE mode since attribute
15329 can make operands unaligned. */
15330 if (can_create_pseudo_p ()
15331 && SSE_REG_MODE_P (mode)
15332 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15333 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15337 /* ix86_expand_vector_move_misalign() does not like constants ... */
15338 if (CONSTANT_P (op1)
15339 || (GET_CODE (op1) == SUBREG
15340 && CONSTANT_P (SUBREG_REG (op1))))
15341 op1 = validize_mem (force_const_mem (mode, op1));
15343 /* ... nor both arguments in memory. */
15344 if (!register_operand (op0, mode)
15345 && !register_operand (op1, mode))
15346 op1 = force_reg (mode, op1);
15348 tmp[0] = op0; tmp[1] = op1;
15349 ix86_expand_vector_move_misalign (mode, tmp);
15353 /* Make operand1 a register if it isn't already. */
15354 if (can_create_pseudo_p ()
15355 && !register_operand (op0, mode)
15356 && !register_operand (op1, mode))
15358 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15362 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15365 /* Split 32-byte AVX unaligned load and store if needed. */
15368 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15371 rtx (*extract) (rtx, rtx, rtx);
15372 rtx (*move_unaligned) (rtx, rtx);
15373 enum machine_mode mode;
15375 switch (GET_MODE (op0))
15378 gcc_unreachable ();
15380 extract = gen_avx_vextractf128v32qi;
15381 move_unaligned = gen_avx_movdqu256;
15385 extract = gen_avx_vextractf128v8sf;
15386 move_unaligned = gen_avx_movups256;
15390 extract = gen_avx_vextractf128v4df;
15391 move_unaligned = gen_avx_movupd256;
15396 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15398 rtx r = gen_reg_rtx (mode);
15399 m = adjust_address (op1, mode, 0);
15400 emit_move_insn (r, m);
15401 m = adjust_address (op1, mode, 16);
15402 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15403 emit_move_insn (op0, r);
15405 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15407 m = adjust_address (op0, mode, 0);
15408 emit_insn (extract (m, op1, const0_rtx));
15409 m = adjust_address (op0, mode, 16);
15410 emit_insn (extract (m, op1, const1_rtx));
15413 emit_insn (move_unaligned (op0, op1));
15416 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15417 straight to ix86_expand_vector_move. */
15418 /* Code generation for scalar reg-reg moves of single and double precision data:
15419 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15423 if (x86_sse_partial_reg_dependency == true)
15428 Code generation for scalar loads of double precision data:
15429 if (x86_sse_split_regs == true)
15430 movlpd mem, reg (gas syntax)
15434 Code generation for unaligned packed loads of single precision data
15435 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15436 if (x86_sse_unaligned_move_optimal)
15439 if (x86_sse_partial_reg_dependency == true)
15451 Code generation for unaligned packed loads of double precision data
15452 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15453 if (x86_sse_unaligned_move_optimal)
15456 if (x86_sse_split_regs == true)
15469 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15478 switch (GET_MODE_CLASS (mode))
15480 case MODE_VECTOR_INT:
15482 switch (GET_MODE_SIZE (mode))
15485 /* If we're optimizing for size, movups is the smallest. */
15486 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15488 op0 = gen_lowpart (V4SFmode, op0);
15489 op1 = gen_lowpart (V4SFmode, op1);
15490 emit_insn (gen_sse_movups (op0, op1));
15493 op0 = gen_lowpart (V16QImode, op0);
15494 op1 = gen_lowpart (V16QImode, op1);
15495 emit_insn (gen_sse2_movdqu (op0, op1));
15498 op0 = gen_lowpart (V32QImode, op0);
15499 op1 = gen_lowpart (V32QImode, op1);
15500 ix86_avx256_split_vector_move_misalign (op0, op1);
15503 gcc_unreachable ();
15506 case MODE_VECTOR_FLOAT:
15507 op0 = gen_lowpart (mode, op0);
15508 op1 = gen_lowpart (mode, op1);
15513 emit_insn (gen_sse_movups (op0, op1));
15516 ix86_avx256_split_vector_move_misalign (op0, op1);
15519 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15521 op0 = gen_lowpart (V4SFmode, op0);
15522 op1 = gen_lowpart (V4SFmode, op1);
15523 emit_insn (gen_sse_movups (op0, op1));
15526 emit_insn (gen_sse2_movupd (op0, op1));
15529 ix86_avx256_split_vector_move_misalign (op0, op1);
15532 gcc_unreachable ();
15537 gcc_unreachable ();
15545 /* If we're optimizing for size, movups is the smallest. */
15546 if (optimize_insn_for_size_p ()
15547 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15549 op0 = gen_lowpart (V4SFmode, op0);
15550 op1 = gen_lowpart (V4SFmode, op1);
15551 emit_insn (gen_sse_movups (op0, op1));
15555 /* ??? If we have typed data, then it would appear that using
15556 movdqu is the only way to get unaligned data loaded with
15558 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15560 op0 = gen_lowpart (V16QImode, op0);
15561 op1 = gen_lowpart (V16QImode, op1);
15562 emit_insn (gen_sse2_movdqu (op0, op1));
15566 if (TARGET_SSE2 && mode == V2DFmode)
15570 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15572 op0 = gen_lowpart (V2DFmode, op0);
15573 op1 = gen_lowpart (V2DFmode, op1);
15574 emit_insn (gen_sse2_movupd (op0, op1));
15578 /* When SSE registers are split into halves, we can avoid
15579 writing to the top half twice. */
15580 if (TARGET_SSE_SPLIT_REGS)
15582 emit_clobber (op0);
15587 /* ??? Not sure about the best option for the Intel chips.
15588 The following would seem to satisfy; the register is
15589 entirely cleared, breaking the dependency chain. We
15590 then store to the upper half, with a dependency depth
15591 of one. A rumor has it that Intel recommends two movsd
15592 followed by an unpacklpd, but this is unconfirmed. And
15593 given that the dependency depth of the unpacklpd would
15594 still be one, I'm not sure why this would be better. */
15595 zero = CONST0_RTX (V2DFmode);
15598 m = adjust_address (op1, DFmode, 0);
15599 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15600 m = adjust_address (op1, DFmode, 8);
15601 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15605 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15607 op0 = gen_lowpart (V4SFmode, op0);
15608 op1 = gen_lowpart (V4SFmode, op1);
15609 emit_insn (gen_sse_movups (op0, op1));
15613 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15614 emit_move_insn (op0, CONST0_RTX (mode));
15616 emit_clobber (op0);
15618 if (mode != V4SFmode)
15619 op0 = gen_lowpart (V4SFmode, op0);
15620 m = adjust_address (op1, V2SFmode, 0);
15621 emit_insn (gen_sse_loadlps (op0, op0, m));
15622 m = adjust_address (op1, V2SFmode, 8);
15623 emit_insn (gen_sse_loadhps (op0, op0, m));
15626 else if (MEM_P (op0))
15628 /* If we're optimizing for size, movups is the smallest. */
15629 if (optimize_insn_for_size_p ()
15630 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15632 op0 = gen_lowpart (V4SFmode, op0);
15633 op1 = gen_lowpart (V4SFmode, op1);
15634 emit_insn (gen_sse_movups (op0, op1));
15638 /* ??? Similar to above, only less clear because of quote
15639 typeless stores unquote. */
15640 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15641 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15643 op0 = gen_lowpart (V16QImode, op0);
15644 op1 = gen_lowpart (V16QImode, op1);
15645 emit_insn (gen_sse2_movdqu (op0, op1));
15649 if (TARGET_SSE2 && mode == V2DFmode)
15651 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15653 op0 = gen_lowpart (V2DFmode, op0);
15654 op1 = gen_lowpart (V2DFmode, op1);
15655 emit_insn (gen_sse2_movupd (op0, op1));
15659 m = adjust_address (op0, DFmode, 0);
15660 emit_insn (gen_sse2_storelpd (m, op1));
15661 m = adjust_address (op0, DFmode, 8);
15662 emit_insn (gen_sse2_storehpd (m, op1));
15667 if (mode != V4SFmode)
15668 op1 = gen_lowpart (V4SFmode, op1);
15670 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15672 op0 = gen_lowpart (V4SFmode, op0);
15673 emit_insn (gen_sse_movups (op0, op1));
15677 m = adjust_address (op0, V2SFmode, 0);
15678 emit_insn (gen_sse_storelps (m, op1));
15679 m = adjust_address (op0, V2SFmode, 8);
15680 emit_insn (gen_sse_storehps (m, op1));
15685 gcc_unreachable ();
15688 /* Expand a push in MODE. This is some mode for which we do not support
15689 proper push instructions, at least from the registers that we expect
15690 the value to live in. */
15693 ix86_expand_push (enum machine_mode mode, rtx x)
15697 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15698 GEN_INT (-GET_MODE_SIZE (mode)),
15699 stack_pointer_rtx, 1, OPTAB_DIRECT);
15700 if (tmp != stack_pointer_rtx)
15701 emit_move_insn (stack_pointer_rtx, tmp);
15703 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15705 /* When we push an operand onto stack, it has to be aligned at least
15706 at the function argument boundary. However since we don't have
15707 the argument type, we can't determine the actual argument
15709 emit_move_insn (tmp, x);
15712 /* Helper function of ix86_fixup_binary_operands to canonicalize
15713 operand order. Returns true if the operands should be swapped. */
15716 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15719 rtx dst = operands[0];
15720 rtx src1 = operands[1];
15721 rtx src2 = operands[2];
15723 /* If the operation is not commutative, we can't do anything. */
15724 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15727 /* Highest priority is that src1 should match dst. */
15728 if (rtx_equal_p (dst, src1))
15730 if (rtx_equal_p (dst, src2))
15733 /* Next highest priority is that immediate constants come second. */
15734 if (immediate_operand (src2, mode))
15736 if (immediate_operand (src1, mode))
15739 /* Lowest priority is that memory references should come second. */
15749 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15750 destination to use for the operation. If different from the true
15751 destination in operands[0], a copy operation will be required. */
15754 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15757 rtx dst = operands[0];
15758 rtx src1 = operands[1];
15759 rtx src2 = operands[2];
15761 /* Canonicalize operand order. */
15762 if (ix86_swap_binary_operands_p (code, mode, operands))
15766 /* It is invalid to swap operands of different modes. */
15767 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15774 /* Both source operands cannot be in memory. */
15775 if (MEM_P (src1) && MEM_P (src2))
15777 /* Optimization: Only read from memory once. */
15778 if (rtx_equal_p (src1, src2))
15780 src2 = force_reg (mode, src2);
15784 src2 = force_reg (mode, src2);
15787 /* If the destination is memory, and we do not have matching source
15788 operands, do things in registers. */
15789 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15790 dst = gen_reg_rtx (mode);
15792 /* Source 1 cannot be a constant. */
15793 if (CONSTANT_P (src1))
15794 src1 = force_reg (mode, src1);
15796 /* Source 1 cannot be a non-matching memory. */
15797 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15798 src1 = force_reg (mode, src1);
15800 /* Improve address combine. */
15802 && GET_MODE_CLASS (mode) == MODE_INT
15804 src2 = force_reg (mode, src2);
15806 operands[1] = src1;
15807 operands[2] = src2;
15811 /* Similarly, but assume that the destination has already been
15812 set up properly. */
15815 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15816 enum machine_mode mode, rtx operands[])
15818 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15819 gcc_assert (dst == operands[0]);
15822 /* Attempt to expand a binary operator. Make the expansion closer to the
15823 actual machine, then just general_operand, which will allow 3 separate
15824 memory references (one output, two input) in a single insn. */
15827 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15830 rtx src1, src2, dst, op, clob;
15832 dst = ix86_fixup_binary_operands (code, mode, operands);
15833 src1 = operands[1];
15834 src2 = operands[2];
15836 /* Emit the instruction. */
15838 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15839 if (reload_in_progress)
15841 /* Reload doesn't know about the flags register, and doesn't know that
15842 it doesn't want to clobber it. We can only do this with PLUS. */
15843 gcc_assert (code == PLUS);
15846 else if (reload_completed
15848 && !rtx_equal_p (dst, src1))
15850 /* This is going to be an LEA; avoid splitting it later. */
15855 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15856 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15859 /* Fix up the destination if needed. */
15860 if (dst != operands[0])
15861 emit_move_insn (operands[0], dst);
15864 /* Return TRUE or FALSE depending on whether the binary operator meets the
15865 appropriate constraints. */
15868 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15871 rtx dst = operands[0];
15872 rtx src1 = operands[1];
15873 rtx src2 = operands[2];
15875 /* Both source operands cannot be in memory. */
15876 if (MEM_P (src1) && MEM_P (src2))
15879 /* Canonicalize operand order for commutative operators. */
15880 if (ix86_swap_binary_operands_p (code, mode, operands))
15887 /* If the destination is memory, we must have a matching source operand. */
15888 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15891 /* Source 1 cannot be a constant. */
15892 if (CONSTANT_P (src1))
15895 /* Source 1 cannot be a non-matching memory. */
15896 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15897 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15898 return (code == AND
15901 || (TARGET_64BIT && mode == DImode))
15902 && satisfies_constraint_L (src2));
15907 /* Attempt to expand a unary operator. Make the expansion closer to the
15908 actual machine, then just general_operand, which will allow 2 separate
15909 memory references (one output, one input) in a single insn. */
15912 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15915 int matching_memory;
15916 rtx src, dst, op, clob;
15921 /* If the destination is memory, and we do not have matching source
15922 operands, do things in registers. */
15923 matching_memory = 0;
15926 if (rtx_equal_p (dst, src))
15927 matching_memory = 1;
15929 dst = gen_reg_rtx (mode);
15932 /* When source operand is memory, destination must match. */
15933 if (MEM_P (src) && !matching_memory)
15934 src = force_reg (mode, src);
15936 /* Emit the instruction. */
15938 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15939 if (reload_in_progress || code == NOT)
15941 /* Reload doesn't know about the flags register, and doesn't know that
15942 it doesn't want to clobber it. */
15943 gcc_assert (code == NOT);
15948 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15949 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15952 /* Fix up the destination if needed. */
15953 if (dst != operands[0])
15954 emit_move_insn (operands[0], dst);
15957 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15958 divisor are within the range [0-255]. */
15961 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15964 rtx end_label, qimode_label;
15965 rtx insn, div, mod;
15966 rtx scratch, tmp0, tmp1, tmp2;
15967 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15968 rtx (*gen_zero_extend) (rtx, rtx);
15969 rtx (*gen_test_ccno_1) (rtx, rtx);
15974 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15975 gen_test_ccno_1 = gen_testsi_ccno_1;
15976 gen_zero_extend = gen_zero_extendqisi2;
15979 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15980 gen_test_ccno_1 = gen_testdi_ccno_1;
15981 gen_zero_extend = gen_zero_extendqidi2;
15984 gcc_unreachable ();
15987 end_label = gen_label_rtx ();
15988 qimode_label = gen_label_rtx ();
15990 scratch = gen_reg_rtx (mode);
15992 /* Use 8bit unsigned divimod if dividend and divisor are within
15993 the range [0-255]. */
15994 emit_move_insn (scratch, operands[2]);
15995 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15996 scratch, 1, OPTAB_DIRECT);
15997 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15998 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15999 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16000 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16001 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16003 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16004 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16005 JUMP_LABEL (insn) = qimode_label;
16007 /* Generate original signed/unsigned divimod. */
16008 div = gen_divmod4_1 (operands[0], operands[1],
16009 operands[2], operands[3]);
16012 /* Branch to the end. */
16013 emit_jump_insn (gen_jump (end_label));
16016 /* Generate 8bit unsigned divide. */
16017 emit_label (qimode_label);
16018 /* Don't use operands[0] for result of 8bit divide since not all
16019 registers support QImode ZERO_EXTRACT. */
16020 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16021 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16022 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16023 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16027 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16028 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16032 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16033 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16036 /* Extract remainder from AH. */
16037 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16038 if (REG_P (operands[1]))
16039 insn = emit_move_insn (operands[1], tmp1);
16042 /* Need a new scratch register since the old one has result
16044 scratch = gen_reg_rtx (mode);
16045 emit_move_insn (scratch, tmp1);
16046 insn = emit_move_insn (operands[1], scratch);
16048 set_unique_reg_note (insn, REG_EQUAL, mod);
16050 /* Zero extend quotient from AL. */
16051 tmp1 = gen_lowpart (QImode, tmp0);
16052 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16053 set_unique_reg_note (insn, REG_EQUAL, div);
16055 emit_label (end_label);
16058 #define LEA_MAX_STALL (3)
16059 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16061 /* Increase given DISTANCE in half-cycles according to
16062 dependencies between PREV and NEXT instructions.
16063 Add 1 half-cycle if there is no dependency and
16064 go to next cycle if there is some dependecy. */
16066 static unsigned int
16067 increase_distance (rtx prev, rtx next, unsigned int distance)
16072 if (!prev || !next)
16073 return distance + (distance & 1) + 2;
16075 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
16076 return distance + 1;
16078 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16079 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16080 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
16081 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
16082 return distance + (distance & 1) + 2;
16084 return distance + 1;
16087 /* Function checks if instruction INSN defines register number
16088 REGNO1 or REGNO2. */
16091 insn_defines_reg (unsigned int regno1, unsigned int regno2,
16096 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
16097 if (DF_REF_REG_DEF_P (*def_rec)
16098 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16099 && (regno1 == DF_REF_REGNO (*def_rec)
16100 || regno2 == DF_REF_REGNO (*def_rec)))
16108 /* Function checks if instruction INSN uses register number
16109 REGNO as a part of address expression. */
16112 insn_uses_reg_mem (unsigned int regno, rtx insn)
16116 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
16117 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
16123 /* Search backward for non-agu definition of register number REGNO1
16124 or register number REGNO2 in basic block starting from instruction
16125 START up to head of basic block or instruction INSN.
16127 Function puts true value into *FOUND var if definition was found
16128 and false otherwise.
16130 Distance in half-cycles between START and found instruction or head
16131 of BB is added to DISTANCE and returned. */
16134 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16135 rtx insn, int distance,
16136 rtx start, bool *found)
16138 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16141 enum attr_type insn_type;
16147 && distance < LEA_SEARCH_THRESHOLD)
16149 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16151 distance = increase_distance (prev, next, distance);
16152 if (insn_defines_reg (regno1, regno2, prev))
16154 insn_type = get_attr_type (prev);
16155 if (insn_type != TYPE_LEA)
16164 if (prev == BB_HEAD (bb))
16167 prev = PREV_INSN (prev);
16173 /* Search backward for non-agu definition of register number REGNO1
16174 or register number REGNO2 in INSN's basic block until
16175 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16176 2. Reach neighbour BBs boundary, or
16177 3. Reach agu definition.
16178 Returns the distance between the non-agu definition point and INSN.
16179 If no definition point, returns -1. */
16182 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16185 basic_block bb = BLOCK_FOR_INSN (insn);
16187 bool found = false;
16189 if (insn != BB_HEAD (bb))
16190 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16191 distance, PREV_INSN (insn),
16194 if (!found && distance < LEA_SEARCH_THRESHOLD)
16198 bool simple_loop = false;
16200 FOR_EACH_EDGE (e, ei, bb->preds)
16203 simple_loop = true;
16208 distance = distance_non_agu_define_in_bb (regno1, regno2,
16210 BB_END (bb), &found);
16213 int shortest_dist = -1;
16214 bool found_in_bb = false;
16216 FOR_EACH_EDGE (e, ei, bb->preds)
16219 = distance_non_agu_define_in_bb (regno1, regno2,
16225 if (shortest_dist < 0)
16226 shortest_dist = bb_dist;
16227 else if (bb_dist > 0)
16228 shortest_dist = MIN (bb_dist, shortest_dist);
16234 distance = shortest_dist;
16238 /* get_attr_type may modify recog data. We want to make sure
16239 that recog data is valid for instruction INSN, on which
16240 distance_non_agu_define is called. INSN is unchanged here. */
16241 extract_insn_cached (insn);
16246 return distance >> 1;
16249 /* Return the distance in half-cycles between INSN and the next
16250 insn that uses register number REGNO in memory address added
16251 to DISTANCE. Return -1 if REGNO0 is set.
16253 Put true value into *FOUND if register usage was found and
16255 Put true value into *REDEFINED if register redefinition was
16256 found and false otherwise. */
16259 distance_agu_use_in_bb (unsigned int regno,
16260 rtx insn, int distance, rtx start,
16261 bool *found, bool *redefined)
16263 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16268 *redefined = false;
16272 && distance < LEA_SEARCH_THRESHOLD)
16274 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16276 distance = increase_distance(prev, next, distance);
16277 if (insn_uses_reg_mem (regno, next))
16279 /* Return DISTANCE if OP0 is used in memory
16280 address in NEXT. */
16285 if (insn_defines_reg (regno, INVALID_REGNUM, next))
16287 /* Return -1 if OP0 is set in NEXT. */
16295 if (next == BB_END (bb))
16298 next = NEXT_INSN (next);
16304 /* Return the distance between INSN and the next insn that uses
16305 register number REGNO0 in memory address. Return -1 if no such
16306 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16309 distance_agu_use (unsigned int regno0, rtx insn)
16311 basic_block bb = BLOCK_FOR_INSN (insn);
16313 bool found = false;
16314 bool redefined = false;
16316 if (insn != BB_END (bb))
16317 distance = distance_agu_use_in_bb (regno0, insn, distance,
16319 &found, &redefined);
16321 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16325 bool simple_loop = false;
16327 FOR_EACH_EDGE (e, ei, bb->succs)
16330 simple_loop = true;
16335 distance = distance_agu_use_in_bb (regno0, insn,
16336 distance, BB_HEAD (bb),
16337 &found, &redefined);
16340 int shortest_dist = -1;
16341 bool found_in_bb = false;
16342 bool redefined_in_bb = false;
16344 FOR_EACH_EDGE (e, ei, bb->succs)
16347 = distance_agu_use_in_bb (regno0, insn,
16348 distance, BB_HEAD (e->dest),
16349 &found_in_bb, &redefined_in_bb);
16352 if (shortest_dist < 0)
16353 shortest_dist = bb_dist;
16354 else if (bb_dist > 0)
16355 shortest_dist = MIN (bb_dist, shortest_dist);
16361 distance = shortest_dist;
16365 if (!found || redefined)
16368 return distance >> 1;
16371 /* Define this macro to tune LEA priority vs ADD, it take effect when
16372 there is a dilemma of choicing LEA or ADD
16373 Negative value: ADD is more preferred than LEA
16375 Positive value: LEA is more preferred than ADD*/
16376 #define IX86_LEA_PRIORITY 0
16378 /* Return true if usage of lea INSN has performance advantage
16379 over a sequence of instructions. Instructions sequence has
16380 SPLIT_COST cycles higher latency than lea latency. */
16383 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
16384 unsigned int regno2, unsigned int split_cost)
16386 int dist_define, dist_use;
16388 dist_define = distance_non_agu_define (regno1, regno2, insn);
16389 dist_use = distance_agu_use (regno0, insn);
16391 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16393 /* If there is no non AGU operand definition, no AGU
16394 operand usage and split cost is 0 then both lea
16395 and non lea variants have same priority. Currently
16396 we prefer lea for 64 bit code and non lea on 32 bit
16398 if (dist_use < 0 && split_cost == 0)
16399 return TARGET_64BIT || IX86_LEA_PRIORITY;
16404 /* With longer definitions distance lea is more preferable.
16405 Here we change it to take into account splitting cost and
16407 dist_define += split_cost + IX86_LEA_PRIORITY;
16409 /* If there is no use in memory addess then we just check
16410 that split cost does not exceed AGU stall. */
16412 return dist_define >= LEA_MAX_STALL;
16414 /* If this insn has both backward non-agu dependence and forward
16415 agu dependence, the one with short distance takes effect. */
16416 return dist_define >= dist_use;
16419 /* Return true if it is legal to clobber flags by INSN and
16420 false otherwise. */
16423 ix86_ok_to_clobber_flags (rtx insn)
16425 basic_block bb = BLOCK_FOR_INSN (insn);
16431 if (NONDEBUG_INSN_P (insn))
16433 for (use = DF_INSN_USES (insn); *use; use++)
16434 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
16437 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
16441 if (insn == BB_END (bb))
16444 insn = NEXT_INSN (insn);
16447 live = df_get_live_out(bb);
16448 return !REGNO_REG_SET_P (live, FLAGS_REG);
16451 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16452 move and add to avoid AGU stalls. */
16455 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
16457 unsigned int regno0 = true_regnum (operands[0]);
16458 unsigned int regno1 = true_regnum (operands[1]);
16459 unsigned int regno2 = true_regnum (operands[2]);
16461 /* Check if we need to optimize. */
16462 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16465 /* Check it is correct to split here. */
16466 if (!ix86_ok_to_clobber_flags(insn))
16469 /* We need to split only adds with non destructive
16470 destination operand. */
16471 if (regno0 == regno1 || regno0 == regno2)
16474 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
16477 /* Return true if we need to split lea into a sequence of
16478 instructions to avoid AGU stalls. */
16481 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
16483 unsigned int regno0 = true_regnum (operands[0]) ;
16484 unsigned int regno1 = -1;
16485 unsigned int regno2 = -1;
16486 unsigned int split_cost = 0;
16487 struct ix86_address parts;
16490 /* Check we need to optimize. */
16491 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16494 /* Check it is correct to split here. */
16495 if (!ix86_ok_to_clobber_flags(insn))
16498 ok = ix86_decompose_address (operands[1], &parts);
16501 /* We should not split into add if non legitimate pic
16502 operand is used as displacement. */
16503 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16507 regno1 = true_regnum (parts.base);
16509 regno2 = true_regnum (parts.index);
16511 /* Compute how many cycles we will add to execution time
16512 if split lea into a sequence of instructions. */
16513 if (parts.base || parts.index)
16515 /* Have to use mov instruction if non desctructive
16516 destination form is used. */
16517 if (regno1 != regno0 && regno2 != regno0)
16520 /* Have to add index to base if both exist. */
16521 if (parts.base && parts.index)
16524 /* Have to use shift and adds if scale is 2 or greater. */
16525 if (parts.scale > 1)
16527 if (regno0 != regno1)
16529 else if (regno2 == regno0)
16532 split_cost += parts.scale;
16535 /* Have to use add instruction with immediate if
16536 disp is non zero. */
16537 if (parts.disp && parts.disp != const0_rtx)
16540 /* Subtract the price of lea. */
16544 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
16547 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16548 matches destination. RTX includes clobber of FLAGS_REG. */
16551 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
16556 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
16557 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16559 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16562 /* Split lea instructions into a sequence of instructions
16563 which are executed on ALU to avoid AGU stalls.
16564 It is assumed that it is allowed to clobber flags register
16565 at lea position. */
16568 ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
16570 unsigned int regno0 = true_regnum (operands[0]) ;
16571 unsigned int regno1 = INVALID_REGNUM;
16572 unsigned int regno2 = INVALID_REGNUM;
16573 struct ix86_address parts;
16577 ok = ix86_decompose_address (operands[1], &parts);
16582 if (GET_MODE (parts.base) != mode)
16583 parts.base = gen_rtx_SUBREG (mode, parts.base, 0);
16584 regno1 = true_regnum (parts.base);
16589 if (GET_MODE (parts.index) != mode)
16590 parts.index = gen_rtx_SUBREG (mode, parts.index, 0);
16591 regno2 = true_regnum (parts.index);
16594 if (parts.scale > 1)
16596 /* Case r1 = r1 + ... */
16597 if (regno1 == regno0)
16599 /* If we have a case r1 = r1 + C * r1 then we
16600 should use multiplication which is very
16601 expensive. Assume cost model is wrong if we
16602 have such case here. */
16603 gcc_assert (regno2 != regno0);
16605 for (adds = parts.scale; adds > 0; adds--)
16606 ix86_emit_binop (PLUS, mode, operands[0], parts.index);
16610 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16611 if (regno0 != regno2)
16612 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16614 /* Use shift for scaling. */
16615 ix86_emit_binop (ASHIFT, mode, operands[0],
16616 GEN_INT (exact_log2 (parts.scale)));
16619 ix86_emit_binop (PLUS, mode, operands[0], parts.base);
16621 if (parts.disp && parts.disp != const0_rtx)
16622 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
16625 else if (!parts.base && !parts.index)
16627 gcc_assert(parts.disp);
16628 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.disp));
16634 if (regno0 != regno2)
16635 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16637 else if (!parts.index)
16639 if (regno0 != regno1)
16640 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
16644 if (regno0 == regno1)
16646 else if (regno0 == regno2)
16650 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
16654 ix86_emit_binop (PLUS, mode, operands[0], tmp);
16657 if (parts.disp && parts.disp != const0_rtx)
16658 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
16662 /* Return true if it is ok to optimize an ADD operation to LEA
16663 operation to avoid flag register consumation. For most processors,
16664 ADD is faster than LEA. For the processors like ATOM, if the
16665 destination register of LEA holds an actual address which will be
16666 used soon, LEA is better and otherwise ADD is better. */
16669 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16671 unsigned int regno0 = true_regnum (operands[0]);
16672 unsigned int regno1 = true_regnum (operands[1]);
16673 unsigned int regno2 = true_regnum (operands[2]);
16675 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16676 if (regno0 != regno1 && regno0 != regno2)
16679 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16682 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
16685 /* Return true if destination reg of SET_BODY is shift count of
16689 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16695 /* Retrieve destination of SET_BODY. */
16696 switch (GET_CODE (set_body))
16699 set_dest = SET_DEST (set_body);
16700 if (!set_dest || !REG_P (set_dest))
16704 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16705 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16713 /* Retrieve shift count of USE_BODY. */
16714 switch (GET_CODE (use_body))
16717 shift_rtx = XEXP (use_body, 1);
16720 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16721 if (ix86_dep_by_shift_count_body (set_body,
16722 XVECEXP (use_body, 0, i)))
16730 && (GET_CODE (shift_rtx) == ASHIFT
16731 || GET_CODE (shift_rtx) == LSHIFTRT
16732 || GET_CODE (shift_rtx) == ASHIFTRT
16733 || GET_CODE (shift_rtx) == ROTATE
16734 || GET_CODE (shift_rtx) == ROTATERT))
16736 rtx shift_count = XEXP (shift_rtx, 1);
16738 /* Return true if shift count is dest of SET_BODY. */
16739 if (REG_P (shift_count)
16740 && true_regnum (set_dest) == true_regnum (shift_count))
16747 /* Return true if destination reg of SET_INSN is shift count of
16751 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16753 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16754 PATTERN (use_insn));
16757 /* Return TRUE or FALSE depending on whether the unary operator meets the
16758 appropriate constraints. */
16761 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16762 enum machine_mode mode ATTRIBUTE_UNUSED,
16763 rtx operands[2] ATTRIBUTE_UNUSED)
16765 /* If one of operands is memory, source and destination must match. */
16766 if ((MEM_P (operands[0])
16767 || MEM_P (operands[1]))
16768 && ! rtx_equal_p (operands[0], operands[1]))
16773 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16774 are ok, keeping in mind the possible movddup alternative. */
16777 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16779 if (MEM_P (operands[0]))
16780 return rtx_equal_p (operands[0], operands[1 + high]);
16781 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16782 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16786 /* Post-reload splitter for converting an SF or DFmode value in an
16787 SSE register into an unsigned SImode. */
16790 ix86_split_convert_uns_si_sse (rtx operands[])
16792 enum machine_mode vecmode;
16793 rtx value, large, zero_or_two31, input, two31, x;
16795 large = operands[1];
16796 zero_or_two31 = operands[2];
16797 input = operands[3];
16798 two31 = operands[4];
16799 vecmode = GET_MODE (large);
16800 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16802 /* Load up the value into the low element. We must ensure that the other
16803 elements are valid floats -- zero is the easiest such value. */
16806 if (vecmode == V4SFmode)
16807 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16809 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16813 input = gen_rtx_REG (vecmode, REGNO (input));
16814 emit_move_insn (value, CONST0_RTX (vecmode));
16815 if (vecmode == V4SFmode)
16816 emit_insn (gen_sse_movss (value, value, input));
16818 emit_insn (gen_sse2_movsd (value, value, input));
16821 emit_move_insn (large, two31);
16822 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16824 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16825 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16827 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16828 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16830 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16831 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16833 large = gen_rtx_REG (V4SImode, REGNO (large));
16834 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16836 x = gen_rtx_REG (V4SImode, REGNO (value));
16837 if (vecmode == V4SFmode)
16838 emit_insn (gen_sse2_cvttps2dq (x, value));
16840 emit_insn (gen_sse2_cvttpd2dq (x, value));
16843 emit_insn (gen_xorv4si3 (value, value, large));
16846 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16847 Expects the 64-bit DImode to be supplied in a pair of integral
16848 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16849 -mfpmath=sse, !optimize_size only. */
16852 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16854 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16855 rtx int_xmm, fp_xmm;
16856 rtx biases, exponents;
16859 int_xmm = gen_reg_rtx (V4SImode);
16860 if (TARGET_INTER_UNIT_MOVES)
16861 emit_insn (gen_movdi_to_sse (int_xmm, input));
16862 else if (TARGET_SSE_SPLIT_REGS)
16864 emit_clobber (int_xmm);
16865 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16869 x = gen_reg_rtx (V2DImode);
16870 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16871 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16874 x = gen_rtx_CONST_VECTOR (V4SImode,
16875 gen_rtvec (4, GEN_INT (0x43300000UL),
16876 GEN_INT (0x45300000UL),
16877 const0_rtx, const0_rtx));
16878 exponents = validize_mem (force_const_mem (V4SImode, x));
16880 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16881 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16883 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16884 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16885 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16886 (0x1.0p84 + double(fp_value_hi_xmm)).
16887 Note these exponents differ by 32. */
16889 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16891 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16892 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16893 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16894 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16895 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16896 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16897 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16898 biases = validize_mem (force_const_mem (V2DFmode, biases));
16899 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16901 /* Add the upper and lower DFmode values together. */
16903 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16906 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16907 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16908 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16911 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16914 /* Not used, but eases macroization of patterns. */
16916 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16917 rtx input ATTRIBUTE_UNUSED)
16919 gcc_unreachable ();
16922 /* Convert an unsigned SImode value into a DFmode. Only currently used
16923 for SSE, but applicable anywhere. */
16926 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16928 REAL_VALUE_TYPE TWO31r;
16931 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16932 NULL, 1, OPTAB_DIRECT);
16934 fp = gen_reg_rtx (DFmode);
16935 emit_insn (gen_floatsidf2 (fp, x));
16937 real_ldexp (&TWO31r, &dconst1, 31);
16938 x = const_double_from_real_value (TWO31r, DFmode);
16940 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16942 emit_move_insn (target, x);
16945 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16946 32-bit mode; otherwise we have a direct convert instruction. */
16949 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16951 REAL_VALUE_TYPE TWO32r;
16952 rtx fp_lo, fp_hi, x;
16954 fp_lo = gen_reg_rtx (DFmode);
16955 fp_hi = gen_reg_rtx (DFmode);
16957 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16959 real_ldexp (&TWO32r, &dconst1, 32);
16960 x = const_double_from_real_value (TWO32r, DFmode);
16961 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16963 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16965 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16968 emit_move_insn (target, x);
16971 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16972 For x86_32, -mfpmath=sse, !optimize_size only. */
16974 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16976 REAL_VALUE_TYPE ONE16r;
16977 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16979 real_ldexp (&ONE16r, &dconst1, 16);
16980 x = const_double_from_real_value (ONE16r, SFmode);
16981 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16982 NULL, 0, OPTAB_DIRECT);
16983 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16984 NULL, 0, OPTAB_DIRECT);
16985 fp_hi = gen_reg_rtx (SFmode);
16986 fp_lo = gen_reg_rtx (SFmode);
16987 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16988 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16989 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16991 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16993 if (!rtx_equal_p (target, fp_hi))
16994 emit_move_insn (target, fp_hi);
16997 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16998 then replicate the value for all elements of the vector
17002 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
17006 enum machine_mode scalar_mode;
17023 n_elt = GET_MODE_NUNITS (mode);
17024 v = rtvec_alloc (n_elt);
17025 scalar_mode = GET_MODE_INNER (mode);
17027 RTVEC_ELT (v, 0) = value;
17029 for (i = 1; i < n_elt; ++i)
17030 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
17032 return gen_rtx_CONST_VECTOR (mode, v);
17035 gcc_unreachable ();
17039 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17040 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17041 for an SSE register. If VECT is true, then replicate the mask for
17042 all elements of the vector register. If INVERT is true, then create
17043 a mask excluding the sign bit. */
17046 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
17048 enum machine_mode vec_mode, imode;
17049 HOST_WIDE_INT hi, lo;
17054 /* Find the sign bit, sign extended to 2*HWI. */
17062 mode = GET_MODE_INNER (mode);
17064 lo = 0x80000000, hi = lo < 0;
17072 mode = GET_MODE_INNER (mode);
17074 if (HOST_BITS_PER_WIDE_INT >= 64)
17075 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
17077 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17082 vec_mode = VOIDmode;
17083 if (HOST_BITS_PER_WIDE_INT >= 64)
17086 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
17093 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17097 lo = ~lo, hi = ~hi;
17103 mask = immed_double_const (lo, hi, imode);
17105 vec = gen_rtvec (2, v, mask);
17106 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
17107 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17114 gcc_unreachable ();
17118 lo = ~lo, hi = ~hi;
17120 /* Force this value into the low part of a fp vector constant. */
17121 mask = immed_double_const (lo, hi, imode);
17122 mask = gen_lowpart (mode, mask);
17124 if (vec_mode == VOIDmode)
17125 return force_reg (mode, mask);
17127 v = ix86_build_const_vector (vec_mode, vect, mask);
17128 return force_reg (vec_mode, v);
17131 /* Generate code for floating point ABS or NEG. */
17134 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17137 rtx mask, set, dst, src;
17138 bool use_sse = false;
17139 bool vector_mode = VECTOR_MODE_P (mode);
17140 enum machine_mode vmode = mode;
17144 else if (mode == TFmode)
17146 else if (TARGET_SSE_MATH)
17148 use_sse = SSE_FLOAT_MODE_P (mode);
17149 if (mode == SFmode)
17151 else if (mode == DFmode)
17155 /* NEG and ABS performed with SSE use bitwise mask operations.
17156 Create the appropriate mask now. */
17158 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17165 set = gen_rtx_fmt_e (code, mode, src);
17166 set = gen_rtx_SET (VOIDmode, dst, set);
17173 use = gen_rtx_USE (VOIDmode, mask);
17175 par = gen_rtvec (2, set, use);
17178 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17179 par = gen_rtvec (3, set, use, clob);
17181 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17187 /* Expand a copysign operation. Special case operand 0 being a constant. */
17190 ix86_expand_copysign (rtx operands[])
17192 enum machine_mode mode, vmode;
17193 rtx dest, op0, op1, mask, nmask;
17195 dest = operands[0];
17199 mode = GET_MODE (dest);
17201 if (mode == SFmode)
17203 else if (mode == DFmode)
17208 if (GET_CODE (op0) == CONST_DOUBLE)
17210 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17212 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17213 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17215 if (mode == SFmode || mode == DFmode)
17217 if (op0 == CONST0_RTX (mode))
17218 op0 = CONST0_RTX (vmode);
17221 rtx v = ix86_build_const_vector (vmode, false, op0);
17223 op0 = force_reg (vmode, v);
17226 else if (op0 != CONST0_RTX (mode))
17227 op0 = force_reg (mode, op0);
17229 mask = ix86_build_signbit_mask (vmode, 0, 0);
17231 if (mode == SFmode)
17232 copysign_insn = gen_copysignsf3_const;
17233 else if (mode == DFmode)
17234 copysign_insn = gen_copysigndf3_const;
17236 copysign_insn = gen_copysigntf3_const;
17238 emit_insn (copysign_insn (dest, op0, op1, mask));
17242 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17244 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17245 mask = ix86_build_signbit_mask (vmode, 0, 0);
17247 if (mode == SFmode)
17248 copysign_insn = gen_copysignsf3_var;
17249 else if (mode == DFmode)
17250 copysign_insn = gen_copysigndf3_var;
17252 copysign_insn = gen_copysigntf3_var;
17254 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17258 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17259 be a constant, and so has already been expanded into a vector constant. */
17262 ix86_split_copysign_const (rtx operands[])
17264 enum machine_mode mode, vmode;
17265 rtx dest, op0, mask, x;
17267 dest = operands[0];
17269 mask = operands[3];
17271 mode = GET_MODE (dest);
17272 vmode = GET_MODE (mask);
17274 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17275 x = gen_rtx_AND (vmode, dest, mask);
17276 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17278 if (op0 != CONST0_RTX (vmode))
17280 x = gen_rtx_IOR (vmode, dest, op0);
17281 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17285 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17286 so we have to do two masks. */
17289 ix86_split_copysign_var (rtx operands[])
17291 enum machine_mode mode, vmode;
17292 rtx dest, scratch, op0, op1, mask, nmask, x;
17294 dest = operands[0];
17295 scratch = operands[1];
17298 nmask = operands[4];
17299 mask = operands[5];
17301 mode = GET_MODE (dest);
17302 vmode = GET_MODE (mask);
17304 if (rtx_equal_p (op0, op1))
17306 /* Shouldn't happen often (it's useless, obviously), but when it does
17307 we'd generate incorrect code if we continue below. */
17308 emit_move_insn (dest, op0);
17312 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17314 gcc_assert (REGNO (op1) == REGNO (scratch));
17316 x = gen_rtx_AND (vmode, scratch, mask);
17317 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17320 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17321 x = gen_rtx_NOT (vmode, dest);
17322 x = gen_rtx_AND (vmode, x, op0);
17323 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17327 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17329 x = gen_rtx_AND (vmode, scratch, mask);
17331 else /* alternative 2,4 */
17333 gcc_assert (REGNO (mask) == REGNO (scratch));
17334 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17335 x = gen_rtx_AND (vmode, scratch, op1);
17337 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17339 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17341 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17342 x = gen_rtx_AND (vmode, dest, nmask);
17344 else /* alternative 3,4 */
17346 gcc_assert (REGNO (nmask) == REGNO (dest));
17348 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17349 x = gen_rtx_AND (vmode, dest, op0);
17351 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17354 x = gen_rtx_IOR (vmode, dest, scratch);
17355 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17358 /* Return TRUE or FALSE depending on whether the first SET in INSN
17359 has source and destination with matching CC modes, and that the
17360 CC mode is at least as constrained as REQ_MODE. */
17363 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17366 enum machine_mode set_mode;
17368 set = PATTERN (insn);
17369 if (GET_CODE (set) == PARALLEL)
17370 set = XVECEXP (set, 0, 0);
17371 gcc_assert (GET_CODE (set) == SET);
17372 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17374 set_mode = GET_MODE (SET_DEST (set));
17378 if (req_mode != CCNOmode
17379 && (req_mode != CCmode
17380 || XEXP (SET_SRC (set), 1) != const0_rtx))
17384 if (req_mode == CCGCmode)
17388 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17392 if (req_mode == CCZmode)
17402 if (set_mode != req_mode)
17407 gcc_unreachable ();
17410 return GET_MODE (SET_SRC (set)) == set_mode;
17413 /* Generate insn patterns to do an integer compare of OPERANDS. */
17416 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17418 enum machine_mode cmpmode;
17421 cmpmode = SELECT_CC_MODE (code, op0, op1);
17422 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17424 /* This is very simple, but making the interface the same as in the
17425 FP case makes the rest of the code easier. */
17426 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17427 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17429 /* Return the test that should be put into the flags user, i.e.
17430 the bcc, scc, or cmov instruction. */
17431 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17434 /* Figure out whether to use ordered or unordered fp comparisons.
17435 Return the appropriate mode to use. */
17438 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17440 /* ??? In order to make all comparisons reversible, we do all comparisons
17441 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17442 all forms trapping and nontrapping comparisons, we can make inequality
17443 comparisons trapping again, since it results in better code when using
17444 FCOM based compares. */
17445 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17449 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17451 enum machine_mode mode = GET_MODE (op0);
17453 if (SCALAR_FLOAT_MODE_P (mode))
17455 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17456 return ix86_fp_compare_mode (code);
17461 /* Only zero flag is needed. */
17462 case EQ: /* ZF=0 */
17463 case NE: /* ZF!=0 */
17465 /* Codes needing carry flag. */
17466 case GEU: /* CF=0 */
17467 case LTU: /* CF=1 */
17468 /* Detect overflow checks. They need just the carry flag. */
17469 if (GET_CODE (op0) == PLUS
17470 && rtx_equal_p (op1, XEXP (op0, 0)))
17474 case GTU: /* CF=0 & ZF=0 */
17475 case LEU: /* CF=1 | ZF=1 */
17476 /* Detect overflow checks. They need just the carry flag. */
17477 if (GET_CODE (op0) == MINUS
17478 && rtx_equal_p (op1, XEXP (op0, 0)))
17482 /* Codes possibly doable only with sign flag when
17483 comparing against zero. */
17484 case GE: /* SF=OF or SF=0 */
17485 case LT: /* SF<>OF or SF=1 */
17486 if (op1 == const0_rtx)
17489 /* For other cases Carry flag is not required. */
17491 /* Codes doable only with sign flag when comparing
17492 against zero, but we miss jump instruction for it
17493 so we need to use relational tests against overflow
17494 that thus needs to be zero. */
17495 case GT: /* ZF=0 & SF=OF */
17496 case LE: /* ZF=1 | SF<>OF */
17497 if (op1 == const0_rtx)
17501 /* strcmp pattern do (use flags) and combine may ask us for proper
17506 gcc_unreachable ();
17510 /* Return the fixed registers used for condition codes. */
17513 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17520 /* If two condition code modes are compatible, return a condition code
17521 mode which is compatible with both. Otherwise, return
17524 static enum machine_mode
17525 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17530 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17533 if ((m1 == CCGCmode && m2 == CCGOCmode)
17534 || (m1 == CCGOCmode && m2 == CCGCmode))
17540 gcc_unreachable ();
17570 /* These are only compatible with themselves, which we already
17577 /* Return a comparison we can do and that it is equivalent to
17578 swap_condition (code) apart possibly from orderedness.
17579 But, never change orderedness if TARGET_IEEE_FP, returning
17580 UNKNOWN in that case if necessary. */
17582 static enum rtx_code
17583 ix86_fp_swap_condition (enum rtx_code code)
17587 case GT: /* GTU - CF=0 & ZF=0 */
17588 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17589 case GE: /* GEU - CF=0 */
17590 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17591 case UNLT: /* LTU - CF=1 */
17592 return TARGET_IEEE_FP ? UNKNOWN : GT;
17593 case UNLE: /* LEU - CF=1 | ZF=1 */
17594 return TARGET_IEEE_FP ? UNKNOWN : GE;
17596 return swap_condition (code);
17600 /* Return cost of comparison CODE using the best strategy for performance.
17601 All following functions do use number of instructions as a cost metrics.
17602 In future this should be tweaked to compute bytes for optimize_size and
17603 take into account performance of various instructions on various CPUs. */
17606 ix86_fp_comparison_cost (enum rtx_code code)
17610 /* The cost of code using bit-twiddling on %ah. */
17627 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17631 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17634 gcc_unreachable ();
17637 switch (ix86_fp_comparison_strategy (code))
17639 case IX86_FPCMP_COMI:
17640 return arith_cost > 4 ? 3 : 2;
17641 case IX86_FPCMP_SAHF:
17642 return arith_cost > 4 ? 4 : 3;
17648 /* Return strategy to use for floating-point. We assume that fcomi is always
17649 preferrable where available, since that is also true when looking at size
17650 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17652 enum ix86_fpcmp_strategy
17653 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17655 /* Do fcomi/sahf based test when profitable. */
17658 return IX86_FPCMP_COMI;
17660 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17661 return IX86_FPCMP_SAHF;
17663 return IX86_FPCMP_ARITH;
17666 /* Swap, force into registers, or otherwise massage the two operands
17667 to a fp comparison. The operands are updated in place; the new
17668 comparison code is returned. */
17670 static enum rtx_code
17671 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17673 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17674 rtx op0 = *pop0, op1 = *pop1;
17675 enum machine_mode op_mode = GET_MODE (op0);
17676 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17678 /* All of the unordered compare instructions only work on registers.
17679 The same is true of the fcomi compare instructions. The XFmode
17680 compare instructions require registers except when comparing
17681 against zero or when converting operand 1 from fixed point to
17685 && (fpcmp_mode == CCFPUmode
17686 || (op_mode == XFmode
17687 && ! (standard_80387_constant_p (op0) == 1
17688 || standard_80387_constant_p (op1) == 1)
17689 && GET_CODE (op1) != FLOAT)
17690 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17692 op0 = force_reg (op_mode, op0);
17693 op1 = force_reg (op_mode, op1);
17697 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17698 things around if they appear profitable, otherwise force op0
17699 into a register. */
17701 if (standard_80387_constant_p (op0) == 0
17703 && ! (standard_80387_constant_p (op1) == 0
17706 enum rtx_code new_code = ix86_fp_swap_condition (code);
17707 if (new_code != UNKNOWN)
17710 tmp = op0, op0 = op1, op1 = tmp;
17716 op0 = force_reg (op_mode, op0);
17718 if (CONSTANT_P (op1))
17720 int tmp = standard_80387_constant_p (op1);
17722 op1 = validize_mem (force_const_mem (op_mode, op1));
17726 op1 = force_reg (op_mode, op1);
17729 op1 = force_reg (op_mode, op1);
17733 /* Try to rearrange the comparison to make it cheaper. */
17734 if (ix86_fp_comparison_cost (code)
17735 > ix86_fp_comparison_cost (swap_condition (code))
17736 && (REG_P (op1) || can_create_pseudo_p ()))
17739 tmp = op0, op0 = op1, op1 = tmp;
17740 code = swap_condition (code);
17742 op0 = force_reg (op_mode, op0);
17750 /* Convert comparison codes we use to represent FP comparison to integer
17751 code that will result in proper branch. Return UNKNOWN if no such code
17755 ix86_fp_compare_code_to_integer (enum rtx_code code)
17784 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17787 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17789 enum machine_mode fpcmp_mode, intcmp_mode;
17792 fpcmp_mode = ix86_fp_compare_mode (code);
17793 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17795 /* Do fcomi/sahf based test when profitable. */
17796 switch (ix86_fp_comparison_strategy (code))
17798 case IX86_FPCMP_COMI:
17799 intcmp_mode = fpcmp_mode;
17800 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17801 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17806 case IX86_FPCMP_SAHF:
17807 intcmp_mode = fpcmp_mode;
17808 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17809 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17813 scratch = gen_reg_rtx (HImode);
17814 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17815 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17818 case IX86_FPCMP_ARITH:
17819 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17820 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17821 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17823 scratch = gen_reg_rtx (HImode);
17824 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17826 /* In the unordered case, we have to check C2 for NaN's, which
17827 doesn't happen to work out to anything nice combination-wise.
17828 So do some bit twiddling on the value we've got in AH to come
17829 up with an appropriate set of condition codes. */
17831 intcmp_mode = CCNOmode;
17836 if (code == GT || !TARGET_IEEE_FP)
17838 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17843 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17844 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17845 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17846 intcmp_mode = CCmode;
17852 if (code == LT && TARGET_IEEE_FP)
17854 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17855 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17856 intcmp_mode = CCmode;
17861 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17867 if (code == GE || !TARGET_IEEE_FP)
17869 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17874 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17875 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17881 if (code == LE && TARGET_IEEE_FP)
17883 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17884 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17885 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17886 intcmp_mode = CCmode;
17891 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17897 if (code == EQ && TARGET_IEEE_FP)
17899 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17900 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17901 intcmp_mode = CCmode;
17906 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17912 if (code == NE && TARGET_IEEE_FP)
17914 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17915 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17921 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17927 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17931 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17936 gcc_unreachable ();
17944 /* Return the test that should be put into the flags user, i.e.
17945 the bcc, scc, or cmov instruction. */
17946 return gen_rtx_fmt_ee (code, VOIDmode,
17947 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17952 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17956 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17957 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17959 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17961 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17962 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17965 ret = ix86_expand_int_compare (code, op0, op1);
17971 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17973 enum machine_mode mode = GET_MODE (op0);
17985 tmp = ix86_expand_compare (code, op0, op1);
17986 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17987 gen_rtx_LABEL_REF (VOIDmode, label),
17989 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17996 /* Expand DImode branch into multiple compare+branch. */
17998 rtx lo[2], hi[2], label2;
17999 enum rtx_code code1, code2, code3;
18000 enum machine_mode submode;
18002 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
18004 tmp = op0, op0 = op1, op1 = tmp;
18005 code = swap_condition (code);
18008 split_double_mode (mode, &op0, 1, lo+0, hi+0);
18009 split_double_mode (mode, &op1, 1, lo+1, hi+1);
18011 submode = mode == DImode ? SImode : DImode;
18013 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18014 avoid two branches. This costs one extra insn, so disable when
18015 optimizing for size. */
18017 if ((code == EQ || code == NE)
18018 && (!optimize_insn_for_size_p ()
18019 || hi[1] == const0_rtx || lo[1] == const0_rtx))
18024 if (hi[1] != const0_rtx)
18025 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
18026 NULL_RTX, 0, OPTAB_WIDEN);
18029 if (lo[1] != const0_rtx)
18030 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
18031 NULL_RTX, 0, OPTAB_WIDEN);
18033 tmp = expand_binop (submode, ior_optab, xor1, xor0,
18034 NULL_RTX, 0, OPTAB_WIDEN);
18036 ix86_expand_branch (code, tmp, const0_rtx, label);
18040 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18041 op1 is a constant and the low word is zero, then we can just
18042 examine the high word. Similarly for low word -1 and
18043 less-or-equal-than or greater-than. */
18045 if (CONST_INT_P (hi[1]))
18048 case LT: case LTU: case GE: case GEU:
18049 if (lo[1] == const0_rtx)
18051 ix86_expand_branch (code, hi[0], hi[1], label);
18055 case LE: case LEU: case GT: case GTU:
18056 if (lo[1] == constm1_rtx)
18058 ix86_expand_branch (code, hi[0], hi[1], label);
18066 /* Otherwise, we need two or three jumps. */
18068 label2 = gen_label_rtx ();
18071 code2 = swap_condition (code);
18072 code3 = unsigned_condition (code);
18076 case LT: case GT: case LTU: case GTU:
18079 case LE: code1 = LT; code2 = GT; break;
18080 case GE: code1 = GT; code2 = LT; break;
18081 case LEU: code1 = LTU; code2 = GTU; break;
18082 case GEU: code1 = GTU; code2 = LTU; break;
18084 case EQ: code1 = UNKNOWN; code2 = NE; break;
18085 case NE: code2 = UNKNOWN; break;
18088 gcc_unreachable ();
18093 * if (hi(a) < hi(b)) goto true;
18094 * if (hi(a) > hi(b)) goto false;
18095 * if (lo(a) < lo(b)) goto true;
18099 if (code1 != UNKNOWN)
18100 ix86_expand_branch (code1, hi[0], hi[1], label);
18101 if (code2 != UNKNOWN)
18102 ix86_expand_branch (code2, hi[0], hi[1], label2);
18104 ix86_expand_branch (code3, lo[0], lo[1], label);
18106 if (code2 != UNKNOWN)
18107 emit_label (label2);
18112 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18117 /* Split branch based on floating point condition. */
18119 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18120 rtx target1, rtx target2, rtx tmp, rtx pushed)
18125 if (target2 != pc_rtx)
18128 code = reverse_condition_maybe_unordered (code);
18133 condition = ix86_expand_fp_compare (code, op1, op2,
18136 /* Remove pushed operand from stack. */
18138 ix86_free_from_memory (GET_MODE (pushed));
18140 i = emit_jump_insn (gen_rtx_SET
18142 gen_rtx_IF_THEN_ELSE (VOIDmode,
18143 condition, target1, target2)));
18144 if (split_branch_probability >= 0)
18145 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18149 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18153 gcc_assert (GET_MODE (dest) == QImode);
18155 ret = ix86_expand_compare (code, op0, op1);
18156 PUT_MODE (ret, QImode);
18157 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18160 /* Expand comparison setting or clearing carry flag. Return true when
18161 successful and set pop for the operation. */
18163 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18165 enum machine_mode mode =
18166 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18168 /* Do not handle double-mode compares that go through special path. */
18169 if (mode == (TARGET_64BIT ? TImode : DImode))
18172 if (SCALAR_FLOAT_MODE_P (mode))
18174 rtx compare_op, compare_seq;
18176 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18178 /* Shortcut: following common codes never translate
18179 into carry flag compares. */
18180 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18181 || code == ORDERED || code == UNORDERED)
18184 /* These comparisons require zero flag; swap operands so they won't. */
18185 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18186 && !TARGET_IEEE_FP)
18191 code = swap_condition (code);
18194 /* Try to expand the comparison and verify that we end up with
18195 carry flag based comparison. This fails to be true only when
18196 we decide to expand comparison using arithmetic that is not
18197 too common scenario. */
18199 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18200 compare_seq = get_insns ();
18203 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18204 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18205 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18207 code = GET_CODE (compare_op);
18209 if (code != LTU && code != GEU)
18212 emit_insn (compare_seq);
18217 if (!INTEGRAL_MODE_P (mode))
18226 /* Convert a==0 into (unsigned)a<1. */
18229 if (op1 != const0_rtx)
18232 code = (code == EQ ? LTU : GEU);
18235 /* Convert a>b into b<a or a>=b-1. */
18238 if (CONST_INT_P (op1))
18240 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18241 /* Bail out on overflow. We still can swap operands but that
18242 would force loading of the constant into register. */
18243 if (op1 == const0_rtx
18244 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18246 code = (code == GTU ? GEU : LTU);
18253 code = (code == GTU ? LTU : GEU);
18257 /* Convert a>=0 into (unsigned)a<0x80000000. */
18260 if (mode == DImode || op1 != const0_rtx)
18262 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18263 code = (code == LT ? GEU : LTU);
18267 if (mode == DImode || op1 != constm1_rtx)
18269 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18270 code = (code == LE ? GEU : LTU);
18276 /* Swapping operands may cause constant to appear as first operand. */
18277 if (!nonimmediate_operand (op0, VOIDmode))
18279 if (!can_create_pseudo_p ())
18281 op0 = force_reg (mode, op0);
18283 *pop = ix86_expand_compare (code, op0, op1);
18284 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18289 ix86_expand_int_movcc (rtx operands[])
18291 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18292 rtx compare_seq, compare_op;
18293 enum machine_mode mode = GET_MODE (operands[0]);
18294 bool sign_bit_compare_p = false;
18295 rtx op0 = XEXP (operands[1], 0);
18296 rtx op1 = XEXP (operands[1], 1);
18299 compare_op = ix86_expand_compare (code, op0, op1);
18300 compare_seq = get_insns ();
18303 compare_code = GET_CODE (compare_op);
18305 if ((op1 == const0_rtx && (code == GE || code == LT))
18306 || (op1 == constm1_rtx && (code == GT || code == LE)))
18307 sign_bit_compare_p = true;
18309 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18310 HImode insns, we'd be swallowed in word prefix ops. */
18312 if ((mode != HImode || TARGET_FAST_PREFIX)
18313 && (mode != (TARGET_64BIT ? TImode : DImode))
18314 && CONST_INT_P (operands[2])
18315 && CONST_INT_P (operands[3]))
18317 rtx out = operands[0];
18318 HOST_WIDE_INT ct = INTVAL (operands[2]);
18319 HOST_WIDE_INT cf = INTVAL (operands[3]);
18320 HOST_WIDE_INT diff;
18323 /* Sign bit compares are better done using shifts than we do by using
18325 if (sign_bit_compare_p
18326 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18328 /* Detect overlap between destination and compare sources. */
18331 if (!sign_bit_compare_p)
18334 bool fpcmp = false;
18336 compare_code = GET_CODE (compare_op);
18338 flags = XEXP (compare_op, 0);
18340 if (GET_MODE (flags) == CCFPmode
18341 || GET_MODE (flags) == CCFPUmode)
18345 = ix86_fp_compare_code_to_integer (compare_code);
18348 /* To simplify rest of code, restrict to the GEU case. */
18349 if (compare_code == LTU)
18351 HOST_WIDE_INT tmp = ct;
18354 compare_code = reverse_condition (compare_code);
18355 code = reverse_condition (code);
18360 PUT_CODE (compare_op,
18361 reverse_condition_maybe_unordered
18362 (GET_CODE (compare_op)));
18364 PUT_CODE (compare_op,
18365 reverse_condition (GET_CODE (compare_op)));
18369 if (reg_overlap_mentioned_p (out, op0)
18370 || reg_overlap_mentioned_p (out, op1))
18371 tmp = gen_reg_rtx (mode);
18373 if (mode == DImode)
18374 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18376 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18377 flags, compare_op));
18381 if (code == GT || code == GE)
18382 code = reverse_condition (code);
18385 HOST_WIDE_INT tmp = ct;
18390 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18403 tmp = expand_simple_binop (mode, PLUS,
18405 copy_rtx (tmp), 1, OPTAB_DIRECT);
18416 tmp = expand_simple_binop (mode, IOR,
18418 copy_rtx (tmp), 1, OPTAB_DIRECT);
18420 else if (diff == -1 && ct)
18430 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18432 tmp = expand_simple_binop (mode, PLUS,
18433 copy_rtx (tmp), GEN_INT (cf),
18434 copy_rtx (tmp), 1, OPTAB_DIRECT);
18442 * andl cf - ct, dest
18452 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18455 tmp = expand_simple_binop (mode, AND,
18457 gen_int_mode (cf - ct, mode),
18458 copy_rtx (tmp), 1, OPTAB_DIRECT);
18460 tmp = expand_simple_binop (mode, PLUS,
18461 copy_rtx (tmp), GEN_INT (ct),
18462 copy_rtx (tmp), 1, OPTAB_DIRECT);
18465 if (!rtx_equal_p (tmp, out))
18466 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18473 enum machine_mode cmp_mode = GET_MODE (op0);
18476 tmp = ct, ct = cf, cf = tmp;
18479 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18481 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18483 /* We may be reversing unordered compare to normal compare, that
18484 is not valid in general (we may convert non-trapping condition
18485 to trapping one), however on i386 we currently emit all
18486 comparisons unordered. */
18487 compare_code = reverse_condition_maybe_unordered (compare_code);
18488 code = reverse_condition_maybe_unordered (code);
18492 compare_code = reverse_condition (compare_code);
18493 code = reverse_condition (code);
18497 compare_code = UNKNOWN;
18498 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18499 && CONST_INT_P (op1))
18501 if (op1 == const0_rtx
18502 && (code == LT || code == GE))
18503 compare_code = code;
18504 else if (op1 == constm1_rtx)
18508 else if (code == GT)
18513 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18514 if (compare_code != UNKNOWN
18515 && GET_MODE (op0) == GET_MODE (out)
18516 && (cf == -1 || ct == -1))
18518 /* If lea code below could be used, only optimize
18519 if it results in a 2 insn sequence. */
18521 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18522 || diff == 3 || diff == 5 || diff == 9)
18523 || (compare_code == LT && ct == -1)
18524 || (compare_code == GE && cf == -1))
18527 * notl op1 (if necessary)
18535 code = reverse_condition (code);
18538 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18540 out = expand_simple_binop (mode, IOR,
18542 out, 1, OPTAB_DIRECT);
18543 if (out != operands[0])
18544 emit_move_insn (operands[0], out);
18551 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18552 || diff == 3 || diff == 5 || diff == 9)
18553 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18555 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18561 * lea cf(dest*(ct-cf)),dest
18565 * This also catches the degenerate setcc-only case.
18571 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18574 /* On x86_64 the lea instruction operates on Pmode, so we need
18575 to get arithmetics done in proper mode to match. */
18577 tmp = copy_rtx (out);
18581 out1 = copy_rtx (out);
18582 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18586 tmp = gen_rtx_PLUS (mode, tmp, out1);
18592 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18595 if (!rtx_equal_p (tmp, out))
18598 out = force_operand (tmp, copy_rtx (out));
18600 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18602 if (!rtx_equal_p (out, operands[0]))
18603 emit_move_insn (operands[0], copy_rtx (out));
18609 * General case: Jumpful:
18610 * xorl dest,dest cmpl op1, op2
18611 * cmpl op1, op2 movl ct, dest
18612 * setcc dest jcc 1f
18613 * decl dest movl cf, dest
18614 * andl (cf-ct),dest 1:
18617 * Size 20. Size 14.
18619 * This is reasonably steep, but branch mispredict costs are
18620 * high on modern cpus, so consider failing only if optimizing
18624 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18625 && BRANCH_COST (optimize_insn_for_speed_p (),
18630 enum machine_mode cmp_mode = GET_MODE (op0);
18635 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18637 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18639 /* We may be reversing unordered compare to normal compare,
18640 that is not valid in general (we may convert non-trapping
18641 condition to trapping one), however on i386 we currently
18642 emit all comparisons unordered. */
18643 code = reverse_condition_maybe_unordered (code);
18647 code = reverse_condition (code);
18648 if (compare_code != UNKNOWN)
18649 compare_code = reverse_condition (compare_code);
18653 if (compare_code != UNKNOWN)
18655 /* notl op1 (if needed)
18660 For x < 0 (resp. x <= -1) there will be no notl,
18661 so if possible swap the constants to get rid of the
18663 True/false will be -1/0 while code below (store flag
18664 followed by decrement) is 0/-1, so the constants need
18665 to be exchanged once more. */
18667 if (compare_code == GE || !cf)
18669 code = reverse_condition (code);
18674 HOST_WIDE_INT tmp = cf;
18679 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18683 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18685 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18687 copy_rtx (out), 1, OPTAB_DIRECT);
18690 out = expand_simple_binop (mode, AND, copy_rtx (out),
18691 gen_int_mode (cf - ct, mode),
18692 copy_rtx (out), 1, OPTAB_DIRECT);
18694 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18695 copy_rtx (out), 1, OPTAB_DIRECT);
18696 if (!rtx_equal_p (out, operands[0]))
18697 emit_move_insn (operands[0], copy_rtx (out));
18703 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18705 /* Try a few things more with specific constants and a variable. */
18708 rtx var, orig_out, out, tmp;
18710 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18713 /* If one of the two operands is an interesting constant, load a
18714 constant with the above and mask it in with a logical operation. */
18716 if (CONST_INT_P (operands[2]))
18719 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18720 operands[3] = constm1_rtx, op = and_optab;
18721 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18722 operands[3] = const0_rtx, op = ior_optab;
18726 else if (CONST_INT_P (operands[3]))
18729 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18730 operands[2] = constm1_rtx, op = and_optab;
18731 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18732 operands[2] = const0_rtx, op = ior_optab;
18739 orig_out = operands[0];
18740 tmp = gen_reg_rtx (mode);
18743 /* Recurse to get the constant loaded. */
18744 if (ix86_expand_int_movcc (operands) == 0)
18747 /* Mask in the interesting variable. */
18748 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18750 if (!rtx_equal_p (out, orig_out))
18751 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18757 * For comparison with above,
18767 if (! nonimmediate_operand (operands[2], mode))
18768 operands[2] = force_reg (mode, operands[2]);
18769 if (! nonimmediate_operand (operands[3], mode))
18770 operands[3] = force_reg (mode, operands[3]);
18772 if (! register_operand (operands[2], VOIDmode)
18774 || ! register_operand (operands[3], VOIDmode)))
18775 operands[2] = force_reg (mode, operands[2]);
18778 && ! register_operand (operands[3], VOIDmode))
18779 operands[3] = force_reg (mode, operands[3]);
18781 emit_insn (compare_seq);
18782 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18783 gen_rtx_IF_THEN_ELSE (mode,
18784 compare_op, operands[2],
18789 /* Swap, force into registers, or otherwise massage the two operands
18790 to an sse comparison with a mask result. Thus we differ a bit from
18791 ix86_prepare_fp_compare_args which expects to produce a flags result.
18793 The DEST operand exists to help determine whether to commute commutative
18794 operators. The POP0/POP1 operands are updated in place. The new
18795 comparison code is returned, or UNKNOWN if not implementable. */
18797 static enum rtx_code
18798 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18799 rtx *pop0, rtx *pop1)
18807 /* AVX supports all the needed comparisons. */
18810 /* We have no LTGT as an operator. We could implement it with
18811 NE & ORDERED, but this requires an extra temporary. It's
18812 not clear that it's worth it. */
18819 /* These are supported directly. */
18826 /* AVX has 3 operand comparisons, no need to swap anything. */
18829 /* For commutative operators, try to canonicalize the destination
18830 operand to be first in the comparison - this helps reload to
18831 avoid extra moves. */
18832 if (!dest || !rtx_equal_p (dest, *pop1))
18840 /* These are not supported directly before AVX, and furthermore
18841 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
18842 comparison operands to transform into something that is
18847 code = swap_condition (code);
18851 gcc_unreachable ();
18857 /* Detect conditional moves that exactly match min/max operational
18858 semantics. Note that this is IEEE safe, as long as we don't
18859 interchange the operands.
18861 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18862 and TRUE if the operation is successful and instructions are emitted. */
18865 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18866 rtx cmp_op1, rtx if_true, rtx if_false)
18868 enum machine_mode mode;
18874 else if (code == UNGE)
18877 if_true = if_false;
18883 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18885 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18890 mode = GET_MODE (dest);
18892 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18893 but MODE may be a vector mode and thus not appropriate. */
18894 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18896 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18899 if_true = force_reg (mode, if_true);
18900 v = gen_rtvec (2, if_true, if_false);
18901 tmp = gen_rtx_UNSPEC (mode, v, u);
18905 code = is_min ? SMIN : SMAX;
18906 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18909 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18913 /* Expand an sse vector comparison. Return the register with the result. */
18916 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18917 rtx op_true, rtx op_false)
18919 enum machine_mode mode = GET_MODE (dest);
18920 enum machine_mode cmp_mode = GET_MODE (cmp_op0);
18923 cmp_op0 = force_reg (cmp_mode, cmp_op0);
18924 if (!nonimmediate_operand (cmp_op1, cmp_mode))
18925 cmp_op1 = force_reg (cmp_mode, cmp_op1);
18928 || reg_overlap_mentioned_p (dest, op_true)
18929 || reg_overlap_mentioned_p (dest, op_false))
18930 dest = gen_reg_rtx (mode);
18932 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
18933 if (cmp_mode != mode)
18935 x = force_reg (cmp_mode, x);
18936 convert_move (dest, x, false);
18939 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18944 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18945 operations. This is used for both scalar and vector conditional moves. */
18948 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18950 enum machine_mode mode = GET_MODE (dest);
18953 if (vector_all_ones_operand (op_true, mode)
18954 && rtx_equal_p (op_false, CONST0_RTX (mode)))
18956 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
18958 else if (op_false == CONST0_RTX (mode))
18960 op_true = force_reg (mode, op_true);
18961 x = gen_rtx_AND (mode, cmp, op_true);
18962 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18964 else if (op_true == CONST0_RTX (mode))
18966 op_false = force_reg (mode, op_false);
18967 x = gen_rtx_NOT (mode, cmp);
18968 x = gen_rtx_AND (mode, x, op_false);
18969 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18971 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
18973 op_false = force_reg (mode, op_false);
18974 x = gen_rtx_IOR (mode, cmp, op_false);
18975 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18977 else if (TARGET_XOP)
18979 op_true = force_reg (mode, op_true);
18981 if (!nonimmediate_operand (op_false, mode))
18982 op_false = force_reg (mode, op_false);
18984 emit_insn (gen_rtx_SET (mode, dest,
18985 gen_rtx_IF_THEN_ELSE (mode, cmp,
18991 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
18993 if (!nonimmediate_operand (op_true, mode))
18994 op_true = force_reg (mode, op_true);
18996 op_false = force_reg (mode, op_false);
19002 gen = gen_sse4_1_blendvps;
19006 gen = gen_sse4_1_blendvpd;
19014 gen = gen_sse4_1_pblendvb;
19015 dest = gen_lowpart (V16QImode, dest);
19016 op_false = gen_lowpart (V16QImode, op_false);
19017 op_true = gen_lowpart (V16QImode, op_true);
19018 cmp = gen_lowpart (V16QImode, cmp);
19023 gen = gen_avx_blendvps256;
19027 gen = gen_avx_blendvpd256;
19035 gen = gen_avx2_pblendvb;
19036 dest = gen_lowpart (V32QImode, dest);
19037 op_false = gen_lowpart (V32QImode, op_false);
19038 op_true = gen_lowpart (V32QImode, op_true);
19039 cmp = gen_lowpart (V32QImode, cmp);
19047 emit_insn (gen (dest, op_false, op_true, cmp));
19050 op_true = force_reg (mode, op_true);
19052 t2 = gen_reg_rtx (mode);
19054 t3 = gen_reg_rtx (mode);
19058 x = gen_rtx_AND (mode, op_true, cmp);
19059 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
19061 x = gen_rtx_NOT (mode, cmp);
19062 x = gen_rtx_AND (mode, x, op_false);
19063 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
19065 x = gen_rtx_IOR (mode, t3, t2);
19066 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19071 /* Expand a floating-point conditional move. Return true if successful. */
19074 ix86_expand_fp_movcc (rtx operands[])
19076 enum machine_mode mode = GET_MODE (operands[0]);
19077 enum rtx_code code = GET_CODE (operands[1]);
19078 rtx tmp, compare_op;
19079 rtx op0 = XEXP (operands[1], 0);
19080 rtx op1 = XEXP (operands[1], 1);
19082 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
19084 enum machine_mode cmode;
19086 /* Since we've no cmove for sse registers, don't force bad register
19087 allocation just to gain access to it. Deny movcc when the
19088 comparison mode doesn't match the move mode. */
19089 cmode = GET_MODE (op0);
19090 if (cmode == VOIDmode)
19091 cmode = GET_MODE (op1);
19095 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
19096 if (code == UNKNOWN)
19099 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
19100 operands[2], operands[3]))
19103 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
19104 operands[2], operands[3]);
19105 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
19109 /* The floating point conditional move instructions don't directly
19110 support conditions resulting from a signed integer comparison. */
19112 compare_op = ix86_expand_compare (code, op0, op1);
19113 if (!fcmov_comparison_operator (compare_op, VOIDmode))
19115 tmp = gen_reg_rtx (QImode);
19116 ix86_expand_setcc (tmp, code, op0, op1);
19118 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
19121 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19122 gen_rtx_IF_THEN_ELSE (mode, compare_op,
19123 operands[2], operands[3])));
19128 /* Expand a floating-point vector conditional move; a vcond operation
19129 rather than a movcc operation. */
19132 ix86_expand_fp_vcond (rtx operands[])
19134 enum rtx_code code = GET_CODE (operands[3]);
19137 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
19138 &operands[4], &operands[5]);
19139 if (code == UNKNOWN)
19142 switch (GET_CODE (operands[3]))
19145 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
19146 operands[5], operands[0], operands[0]);
19147 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
19148 operands[5], operands[1], operands[2]);
19152 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
19153 operands[5], operands[0], operands[0]);
19154 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
19155 operands[5], operands[1], operands[2]);
19159 gcc_unreachable ();
19161 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
19163 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19167 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
19168 operands[5], operands[1], operands[2]))
19171 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
19172 operands[1], operands[2]);
19173 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19177 /* Expand a signed/unsigned integral vector conditional move. */
19180 ix86_expand_int_vcond (rtx operands[])
19182 enum machine_mode data_mode = GET_MODE (operands[0]);
19183 enum machine_mode mode = GET_MODE (operands[4]);
19184 enum rtx_code code = GET_CODE (operands[3]);
19185 bool negate = false;
19188 cop0 = operands[4];
19189 cop1 = operands[5];
19191 /* XOP supports all of the comparisons on all vector int types. */
19194 /* Canonicalize the comparison to EQ, GT, GTU. */
19205 code = reverse_condition (code);
19211 code = reverse_condition (code);
19217 code = swap_condition (code);
19218 x = cop0, cop0 = cop1, cop1 = x;
19222 gcc_unreachable ();
19225 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19226 if (mode == V2DImode)
19231 /* SSE4.1 supports EQ. */
19232 if (!TARGET_SSE4_1)
19238 /* SSE4.2 supports GT/GTU. */
19239 if (!TARGET_SSE4_2)
19244 gcc_unreachable ();
19248 /* Unsigned parallel compare is not supported by the hardware.
19249 Play some tricks to turn this into a signed comparison
19253 cop0 = force_reg (mode, cop0);
19263 rtx (*gen_sub3) (rtx, rtx, rtx);
19267 case V8SImode: gen_sub3 = gen_subv8si3; break;
19268 case V4DImode: gen_sub3 = gen_subv4di3; break;
19269 case V4SImode: gen_sub3 = gen_subv4si3; break;
19270 case V2DImode: gen_sub3 = gen_subv2di3; break;
19272 gcc_unreachable ();
19274 /* Subtract (-(INT MAX) - 1) from both operands to make
19276 mask = ix86_build_signbit_mask (mode, true, false);
19277 t1 = gen_reg_rtx (mode);
19278 emit_insn (gen_sub3 (t1, cop0, mask));
19280 t2 = gen_reg_rtx (mode);
19281 emit_insn (gen_sub3 (t2, cop1, mask));
19293 /* Perform a parallel unsigned saturating subtraction. */
19294 x = gen_reg_rtx (mode);
19295 emit_insn (gen_rtx_SET (VOIDmode, x,
19296 gen_rtx_US_MINUS (mode, cop0, cop1)));
19299 cop1 = CONST0_RTX (mode);
19305 gcc_unreachable ();
19310 /* Allow the comparison to be done in one mode, but the movcc to
19311 happen in another mode. */
19312 if (data_mode == mode)
19314 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19315 operands[1+negate], operands[2-negate]);
19319 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
19320 x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]),
19322 operands[1+negate], operands[2-negate]);
19323 x = gen_lowpart (data_mode, x);
19326 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19327 operands[2-negate]);
19331 /* Expand a variable vector permutation. */
19334 ix86_expand_vec_perm (rtx operands[])
19336 rtx target = operands[0];
19337 rtx op0 = operands[1];
19338 rtx op1 = operands[2];
19339 rtx mask = operands[3];
19340 rtx t1, t2, t3, t4, vt, vt2, vec[32];
19341 enum machine_mode mode = GET_MODE (op0);
19342 enum machine_mode maskmode = GET_MODE (mask);
19344 bool one_operand_shuffle = rtx_equal_p (op0, op1);
19346 /* Number of elements in the vector. */
19347 w = GET_MODE_NUNITS (mode);
19348 e = GET_MODE_UNIT_SIZE (mode);
19349 gcc_assert (w <= 32);
19353 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
19355 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19356 an constant shuffle operand. With a tiny bit of effort we can
19357 use VPERMD instead. A re-interpretation stall for V4DFmode is
19358 unfortunate but there's no avoiding it.
19359 Similarly for V16HImode we don't have instructions for variable
19360 shuffling, while for V32QImode we can use after preparing suitable
19361 masks vpshufb; vpshufb; vpermq; vpor. */
19363 if (mode == V16HImode)
19365 maskmode = mode = V32QImode;
19371 maskmode = mode = V8SImode;
19375 t1 = gen_reg_rtx (maskmode);
19377 /* Replicate the low bits of the V4DImode mask into V8SImode:
19379 t1 = { A A B B C C D D }. */
19380 for (i = 0; i < w / 2; ++i)
19381 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
19382 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19383 vt = force_reg (maskmode, vt);
19384 mask = gen_lowpart (maskmode, mask);
19385 if (maskmode == V8SImode)
19386 emit_insn (gen_avx2_permvarv8si (t1, vt, mask));
19388 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
19390 /* Multiply the shuffle indicies by two. */
19391 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
19394 /* Add one to the odd shuffle indicies:
19395 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19396 for (i = 0; i < w / 2; ++i)
19398 vec[i * 2] = const0_rtx;
19399 vec[i * 2 + 1] = const1_rtx;
19401 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19402 vt = force_const_mem (maskmode, vt);
19403 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
19406 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19407 operands[3] = mask = t1;
19408 target = gen_lowpart (mode, target);
19409 op0 = gen_lowpart (mode, op0);
19410 op1 = gen_lowpart (mode, op1);
19416 /* The VPERMD and VPERMPS instructions already properly ignore
19417 the high bits of the shuffle elements. No need for us to
19418 perform an AND ourselves. */
19419 if (one_operand_shuffle)
19420 emit_insn (gen_avx2_permvarv8si (target, mask, op0));
19423 t1 = gen_reg_rtx (V8SImode);
19424 t2 = gen_reg_rtx (V8SImode);
19425 emit_insn (gen_avx2_permvarv8si (t1, mask, op0));
19426 emit_insn (gen_avx2_permvarv8si (t2, mask, op1));
19432 mask = gen_lowpart (V8SFmode, mask);
19433 if (one_operand_shuffle)
19434 emit_insn (gen_avx2_permvarv8sf (target, mask, op0));
19437 t1 = gen_reg_rtx (V8SFmode);
19438 t2 = gen_reg_rtx (V8SFmode);
19439 emit_insn (gen_avx2_permvarv8sf (t1, mask, op0));
19440 emit_insn (gen_avx2_permvarv8sf (t2, mask, op1));
19446 /* By combining the two 128-bit input vectors into one 256-bit
19447 input vector, we can use VPERMD and VPERMPS for the full
19448 two-operand shuffle. */
19449 t1 = gen_reg_rtx (V8SImode);
19450 t2 = gen_reg_rtx (V8SImode);
19451 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
19452 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
19453 emit_insn (gen_avx2_permvarv8si (t1, t2, t1));
19454 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
19458 t1 = gen_reg_rtx (V8SFmode);
19459 t2 = gen_reg_rtx (V8SFmode);
19460 mask = gen_lowpart (V4SFmode, mask);
19461 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
19462 emit_insn (gen_avx_vec_concatv8sf (t2, mask, mask));
19463 emit_insn (gen_avx2_permvarv8sf (t1, t2, t1));
19464 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
19468 t1 = gen_reg_rtx (V32QImode);
19469 t2 = gen_reg_rtx (V32QImode);
19470 t3 = gen_reg_rtx (V32QImode);
19471 vt2 = GEN_INT (128);
19472 for (i = 0; i < 32; i++)
19474 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
19475 vt = force_reg (V32QImode, vt);
19476 for (i = 0; i < 32; i++)
19477 vec[i] = i < 16 ? vt2 : const0_rtx;
19478 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
19479 vt2 = force_reg (V32QImode, vt2);
19480 /* From mask create two adjusted masks, which contain the same
19481 bits as mask in the low 7 bits of each vector element.
19482 The first mask will have the most significant bit clear
19483 if it requests element from the same 128-bit lane
19484 and MSB set if it requests element from the other 128-bit lane.
19485 The second mask will have the opposite values of the MSB,
19486 and additionally will have its 128-bit lanes swapped.
19487 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
19488 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
19489 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
19490 stands for other 12 bytes. */
19491 /* The bit whether element is from the same lane or the other
19492 lane is bit 4, so shift it up by 3 to the MSB position. */
19493 emit_insn (gen_avx2_lshlv4di3 (gen_lowpart (V4DImode, t1),
19494 gen_lowpart (V4DImode, mask),
19496 /* Clear MSB bits from the mask just in case it had them set. */
19497 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
19498 /* After this t1 will have MSB set for elements from other lane. */
19499 emit_insn (gen_xorv32qi3 (t1, t1, vt2));
19500 /* Clear bits other than MSB. */
19501 emit_insn (gen_andv32qi3 (t1, t1, vt));
19502 /* Or in the lower bits from mask into t3. */
19503 emit_insn (gen_iorv32qi3 (t3, t1, t2));
19504 /* And invert MSB bits in t1, so MSB is set for elements from the same
19506 emit_insn (gen_xorv32qi3 (t1, t1, vt));
19507 /* Swap 128-bit lanes in t3. */
19508 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
19509 gen_lowpart (V4DImode, t3),
19510 const2_rtx, GEN_INT (3),
19511 const0_rtx, const1_rtx));
19512 /* And or in the lower bits from mask into t1. */
19513 emit_insn (gen_iorv32qi3 (t1, t1, t2));
19514 if (one_operand_shuffle)
19516 /* Each of these shuffles will put 0s in places where
19517 element from the other 128-bit lane is needed, otherwise
19518 will shuffle in the requested value. */
19519 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, t3));
19520 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
19521 /* For t3 the 128-bit lanes are swapped again. */
19522 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
19523 gen_lowpart (V4DImode, t3),
19524 const2_rtx, GEN_INT (3),
19525 const0_rtx, const1_rtx));
19526 /* And oring both together leads to the result. */
19527 emit_insn (gen_iorv32qi3 (target, t1, t3));
19531 t4 = gen_reg_rtx (V32QImode);
19532 /* Similarly to the above one_operand_shuffle code,
19533 just for repeated twice for each operand. merge_two:
19534 code will merge the two results together. */
19535 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, t3));
19536 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, t3));
19537 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
19538 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
19539 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t4),
19540 gen_lowpart (V4DImode, t4),
19541 const2_rtx, GEN_INT (3),
19542 const0_rtx, const1_rtx));
19543 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
19544 gen_lowpart (V4DImode, t3),
19545 const2_rtx, GEN_INT (3),
19546 const0_rtx, const1_rtx));
19547 emit_insn (gen_iorv32qi3 (t4, t2, t4));
19548 emit_insn (gen_iorv32qi3 (t3, t1, t3));
19554 gcc_assert (GET_MODE_SIZE (mode) <= 16);
19561 /* The XOP VPPERM insn supports three inputs. By ignoring the
19562 one_operand_shuffle special case, we avoid creating another
19563 set of constant vectors in memory. */
19564 one_operand_shuffle = false;
19566 /* mask = mask & {2*w-1, ...} */
19567 vt = GEN_INT (2*w - 1);
19571 /* mask = mask & {w-1, ...} */
19572 vt = GEN_INT (w - 1);
19575 for (i = 0; i < w; i++)
19577 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19578 mask = expand_simple_binop (maskmode, AND, mask, vt,
19579 NULL_RTX, 0, OPTAB_DIRECT);
19581 /* For non-QImode operations, convert the word permutation control
19582 into a byte permutation control. */
19583 if (mode != V16QImode)
19585 mask = expand_simple_binop (maskmode, ASHIFT, mask,
19586 GEN_INT (exact_log2 (e)),
19587 NULL_RTX, 0, OPTAB_DIRECT);
19589 /* Convert mask to vector of chars. */
19590 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
19592 /* Replicate each of the input bytes into byte positions:
19593 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
19594 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
19595 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
19596 for (i = 0; i < 16; ++i)
19597 vec[i] = GEN_INT (i/e * e);
19598 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
19599 vt = force_const_mem (V16QImode, vt);
19601 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
19603 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
19605 /* Convert it into the byte positions by doing
19606 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
19607 for (i = 0; i < 16; ++i)
19608 vec[i] = GEN_INT (i % e);
19609 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
19610 vt = force_const_mem (V16QImode, vt);
19611 emit_insn (gen_addv16qi3 (mask, mask, vt));
19614 /* The actual shuffle operations all operate on V16QImode. */
19615 op0 = gen_lowpart (V16QImode, op0);
19616 op1 = gen_lowpart (V16QImode, op1);
19617 target = gen_lowpart (V16QImode, target);
19621 emit_insn (gen_xop_pperm (target, op0, op1, mask));
19623 else if (one_operand_shuffle)
19625 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
19632 /* Shuffle the two input vectors independently. */
19633 t1 = gen_reg_rtx (V16QImode);
19634 t2 = gen_reg_rtx (V16QImode);
19635 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
19636 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
19639 /* Then merge them together. The key is whether any given control
19640 element contained a bit set that indicates the second word. */
19641 mask = operands[3];
19643 if (maskmode == V2DImode && !TARGET_SSE4_1)
19645 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
19646 more shuffle to convert the V2DI input mask into a V4SI
19647 input mask. At which point the masking that expand_int_vcond
19648 will work as desired. */
19649 rtx t3 = gen_reg_rtx (V4SImode);
19650 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
19651 const0_rtx, const0_rtx,
19652 const2_rtx, const2_rtx));
19654 maskmode = V4SImode;
19658 for (i = 0; i < w; i++)
19660 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19661 vt = force_reg (maskmode, vt);
19662 mask = expand_simple_binop (maskmode, AND, mask, vt,
19663 NULL_RTX, 0, OPTAB_DIRECT);
19665 xops[0] = gen_lowpart (mode, operands[0]);
19666 xops[1] = gen_lowpart (mode, t2);
19667 xops[2] = gen_lowpart (mode, t1);
19668 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
19671 ok = ix86_expand_int_vcond (xops);
19676 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
19677 true if we should do zero extension, else sign extension. HIGH_P is
19678 true if we want the N/2 high elements, else the low elements. */
19681 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
19683 enum machine_mode imode = GET_MODE (operands[1]);
19688 rtx (*unpack)(rtx, rtx);
19689 rtx (*extract)(rtx, rtx) = NULL;
19690 enum machine_mode halfmode = BLKmode;
19696 unpack = gen_avx2_zero_extendv16qiv16hi2;
19698 unpack = gen_avx2_sign_extendv16qiv16hi2;
19699 halfmode = V16QImode;
19701 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
19705 unpack = gen_avx2_zero_extendv8hiv8si2;
19707 unpack = gen_avx2_sign_extendv8hiv8si2;
19708 halfmode = V8HImode;
19710 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
19714 unpack = gen_avx2_zero_extendv4siv4di2;
19716 unpack = gen_avx2_sign_extendv4siv4di2;
19717 halfmode = V4SImode;
19719 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
19723 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
19725 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
19729 unpack = gen_sse4_1_zero_extendv4hiv4si2;
19731 unpack = gen_sse4_1_sign_extendv4hiv4si2;
19735 unpack = gen_sse4_1_zero_extendv2siv2di2;
19737 unpack = gen_sse4_1_sign_extendv2siv2di2;
19740 gcc_unreachable ();
19743 if (GET_MODE_SIZE (imode) == 32)
19745 tmp = gen_reg_rtx (halfmode);
19746 emit_insn (extract (tmp, operands[1]));
19750 /* Shift higher 8 bytes to lower 8 bytes. */
19751 tmp = gen_reg_rtx (imode);
19752 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
19753 gen_lowpart (V1TImode, operands[1]),
19759 emit_insn (unpack (operands[0], tmp));
19763 rtx (*unpack)(rtx, rtx, rtx);
19769 unpack = gen_vec_interleave_highv16qi;
19771 unpack = gen_vec_interleave_lowv16qi;
19775 unpack = gen_vec_interleave_highv8hi;
19777 unpack = gen_vec_interleave_lowv8hi;
19781 unpack = gen_vec_interleave_highv4si;
19783 unpack = gen_vec_interleave_lowv4si;
19786 gcc_unreachable ();
19789 dest = gen_lowpart (imode, operands[0]);
19792 tmp = force_reg (imode, CONST0_RTX (imode));
19794 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
19795 operands[1], pc_rtx, pc_rtx);
19797 emit_insn (unpack (dest, operands[1], tmp));
19801 /* Expand conditional increment or decrement using adb/sbb instructions.
19802 The default case using setcc followed by the conditional move can be
19803 done by generic code. */
19805 ix86_expand_int_addcc (rtx operands[])
19807 enum rtx_code code = GET_CODE (operands[1]);
19809 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
19811 rtx val = const0_rtx;
19812 bool fpcmp = false;
19813 enum machine_mode mode;
19814 rtx op0 = XEXP (operands[1], 0);
19815 rtx op1 = XEXP (operands[1], 1);
19817 if (operands[3] != const1_rtx
19818 && operands[3] != constm1_rtx)
19820 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
19822 code = GET_CODE (compare_op);
19824 flags = XEXP (compare_op, 0);
19826 if (GET_MODE (flags) == CCFPmode
19827 || GET_MODE (flags) == CCFPUmode)
19830 code = ix86_fp_compare_code_to_integer (code);
19837 PUT_CODE (compare_op,
19838 reverse_condition_maybe_unordered
19839 (GET_CODE (compare_op)));
19841 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
19844 mode = GET_MODE (operands[0]);
19846 /* Construct either adc or sbb insn. */
19847 if ((code == LTU) == (operands[3] == constm1_rtx))
19852 insn = gen_subqi3_carry;
19855 insn = gen_subhi3_carry;
19858 insn = gen_subsi3_carry;
19861 insn = gen_subdi3_carry;
19864 gcc_unreachable ();
19872 insn = gen_addqi3_carry;
19875 insn = gen_addhi3_carry;
19878 insn = gen_addsi3_carry;
19881 insn = gen_adddi3_carry;
19884 gcc_unreachable ();
19887 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
19893 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
19894 but works for floating pointer parameters and nonoffsetable memories.
19895 For pushes, it returns just stack offsets; the values will be saved
19896 in the right order. Maximally three parts are generated. */
19899 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
19904 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
19906 size = (GET_MODE_SIZE (mode) + 4) / 8;
19908 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
19909 gcc_assert (size >= 2 && size <= 4);
19911 /* Optimize constant pool reference to immediates. This is used by fp
19912 moves, that force all constants to memory to allow combining. */
19913 if (MEM_P (operand) && MEM_READONLY_P (operand))
19915 rtx tmp = maybe_get_pool_constant (operand);
19920 if (MEM_P (operand) && !offsettable_memref_p (operand))
19922 /* The only non-offsetable memories we handle are pushes. */
19923 int ok = push_operand (operand, VOIDmode);
19927 operand = copy_rtx (operand);
19928 PUT_MODE (operand, Pmode);
19929 parts[0] = parts[1] = parts[2] = parts[3] = operand;
19933 if (GET_CODE (operand) == CONST_VECTOR)
19935 enum machine_mode imode = int_mode_for_mode (mode);
19936 /* Caution: if we looked through a constant pool memory above,
19937 the operand may actually have a different mode now. That's
19938 ok, since we want to pun this all the way back to an integer. */
19939 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
19940 gcc_assert (operand != NULL);
19946 if (mode == DImode)
19947 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19952 if (REG_P (operand))
19954 gcc_assert (reload_completed);
19955 for (i = 0; i < size; i++)
19956 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
19958 else if (offsettable_memref_p (operand))
19960 operand = adjust_address (operand, SImode, 0);
19961 parts[0] = operand;
19962 for (i = 1; i < size; i++)
19963 parts[i] = adjust_address (operand, SImode, 4 * i);
19965 else if (GET_CODE (operand) == CONST_DOUBLE)
19970 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19974 real_to_target (l, &r, mode);
19975 parts[3] = gen_int_mode (l[3], SImode);
19976 parts[2] = gen_int_mode (l[2], SImode);
19979 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
19980 parts[2] = gen_int_mode (l[2], SImode);
19983 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
19986 gcc_unreachable ();
19988 parts[1] = gen_int_mode (l[1], SImode);
19989 parts[0] = gen_int_mode (l[0], SImode);
19992 gcc_unreachable ();
19997 if (mode == TImode)
19998 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19999 if (mode == XFmode || mode == TFmode)
20001 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
20002 if (REG_P (operand))
20004 gcc_assert (reload_completed);
20005 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
20006 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
20008 else if (offsettable_memref_p (operand))
20010 operand = adjust_address (operand, DImode, 0);
20011 parts[0] = operand;
20012 parts[1] = adjust_address (operand, upper_mode, 8);
20014 else if (GET_CODE (operand) == CONST_DOUBLE)
20019 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
20020 real_to_target (l, &r, mode);
20022 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20023 if (HOST_BITS_PER_WIDE_INT >= 64)
20026 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
20027 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
20030 parts[0] = immed_double_const (l[0], l[1], DImode);
20032 if (upper_mode == SImode)
20033 parts[1] = gen_int_mode (l[2], SImode);
20034 else if (HOST_BITS_PER_WIDE_INT >= 64)
20037 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
20038 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
20041 parts[1] = immed_double_const (l[2], l[3], DImode);
20044 gcc_unreachable ();
20051 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20052 Return false when normal moves are needed; true when all required
20053 insns have been emitted. Operands 2-4 contain the input values
20054 int the correct order; operands 5-7 contain the output values. */
20057 ix86_split_long_move (rtx operands[])
20062 int collisions = 0;
20063 enum machine_mode mode = GET_MODE (operands[0]);
20064 bool collisionparts[4];
20066 /* The DFmode expanders may ask us to move double.
20067 For 64bit target this is single move. By hiding the fact
20068 here we simplify i386.md splitters. */
20069 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
20071 /* Optimize constant pool reference to immediates. This is used by
20072 fp moves, that force all constants to memory to allow combining. */
20074 if (MEM_P (operands[1])
20075 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
20076 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
20077 operands[1] = get_pool_constant (XEXP (operands[1], 0));
20078 if (push_operand (operands[0], VOIDmode))
20080 operands[0] = copy_rtx (operands[0]);
20081 PUT_MODE (operands[0], Pmode);
20084 operands[0] = gen_lowpart (DImode, operands[0]);
20085 operands[1] = gen_lowpart (DImode, operands[1]);
20086 emit_move_insn (operands[0], operands[1]);
20090 /* The only non-offsettable memory we handle is push. */
20091 if (push_operand (operands[0], VOIDmode))
20094 gcc_assert (!MEM_P (operands[0])
20095 || offsettable_memref_p (operands[0]));
20097 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
20098 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
20100 /* When emitting push, take care for source operands on the stack. */
20101 if (push && MEM_P (operands[1])
20102 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
20104 rtx src_base = XEXP (part[1][nparts - 1], 0);
20106 /* Compensate for the stack decrement by 4. */
20107 if (!TARGET_64BIT && nparts == 3
20108 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
20109 src_base = plus_constant (src_base, 4);
20111 /* src_base refers to the stack pointer and is
20112 automatically decreased by emitted push. */
20113 for (i = 0; i < nparts; i++)
20114 part[1][i] = change_address (part[1][i],
20115 GET_MODE (part[1][i]), src_base);
20118 /* We need to do copy in the right order in case an address register
20119 of the source overlaps the destination. */
20120 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
20124 for (i = 0; i < nparts; i++)
20127 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
20128 if (collisionparts[i])
20132 /* Collision in the middle part can be handled by reordering. */
20133 if (collisions == 1 && nparts == 3 && collisionparts [1])
20135 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20136 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20138 else if (collisions == 1
20140 && (collisionparts [1] || collisionparts [2]))
20142 if (collisionparts [1])
20144 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20145 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20149 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
20150 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
20154 /* If there are more collisions, we can't handle it by reordering.
20155 Do an lea to the last part and use only one colliding move. */
20156 else if (collisions > 1)
20162 base = part[0][nparts - 1];
20164 /* Handle the case when the last part isn't valid for lea.
20165 Happens in 64-bit mode storing the 12-byte XFmode. */
20166 if (GET_MODE (base) != Pmode)
20167 base = gen_rtx_REG (Pmode, REGNO (base));
20169 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
20170 part[1][0] = replace_equiv_address (part[1][0], base);
20171 for (i = 1; i < nparts; i++)
20173 tmp = plus_constant (base, UNITS_PER_WORD * i);
20174 part[1][i] = replace_equiv_address (part[1][i], tmp);
20185 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
20186 emit_insn (gen_addsi3 (stack_pointer_rtx,
20187 stack_pointer_rtx, GEN_INT (-4)));
20188 emit_move_insn (part[0][2], part[1][2]);
20190 else if (nparts == 4)
20192 emit_move_insn (part[0][3], part[1][3]);
20193 emit_move_insn (part[0][2], part[1][2]);
20198 /* In 64bit mode we don't have 32bit push available. In case this is
20199 register, it is OK - we will just use larger counterpart. We also
20200 retype memory - these comes from attempt to avoid REX prefix on
20201 moving of second half of TFmode value. */
20202 if (GET_MODE (part[1][1]) == SImode)
20204 switch (GET_CODE (part[1][1]))
20207 part[1][1] = adjust_address (part[1][1], DImode, 0);
20211 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
20215 gcc_unreachable ();
20218 if (GET_MODE (part[1][0]) == SImode)
20219 part[1][0] = part[1][1];
20222 emit_move_insn (part[0][1], part[1][1]);
20223 emit_move_insn (part[0][0], part[1][0]);
20227 /* Choose correct order to not overwrite the source before it is copied. */
20228 if ((REG_P (part[0][0])
20229 && REG_P (part[1][1])
20230 && (REGNO (part[0][0]) == REGNO (part[1][1])
20232 && REGNO (part[0][0]) == REGNO (part[1][2]))
20234 && REGNO (part[0][0]) == REGNO (part[1][3]))))
20236 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
20238 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
20240 operands[2 + i] = part[0][j];
20241 operands[6 + i] = part[1][j];
20246 for (i = 0; i < nparts; i++)
20248 operands[2 + i] = part[0][i];
20249 operands[6 + i] = part[1][i];
20253 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20254 if (optimize_insn_for_size_p ())
20256 for (j = 0; j < nparts - 1; j++)
20257 if (CONST_INT_P (operands[6 + j])
20258 && operands[6 + j] != const0_rtx
20259 && REG_P (operands[2 + j]))
20260 for (i = j; i < nparts - 1; i++)
20261 if (CONST_INT_P (operands[7 + i])
20262 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
20263 operands[7 + i] = operands[2 + j];
20266 for (i = 0; i < nparts; i++)
20267 emit_move_insn (operands[2 + i], operands[6 + i]);
20272 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20273 left shift by a constant, either using a single shift or
20274 a sequence of add instructions. */
20277 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
20279 rtx (*insn)(rtx, rtx, rtx);
20282 || (count * ix86_cost->add <= ix86_cost->shift_const
20283 && !optimize_insn_for_size_p ()))
20285 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
20286 while (count-- > 0)
20287 emit_insn (insn (operand, operand, operand));
20291 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20292 emit_insn (insn (operand, operand, GEN_INT (count)));
20297 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
20299 rtx (*gen_ashl3)(rtx, rtx, rtx);
20300 rtx (*gen_shld)(rtx, rtx, rtx);
20301 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20303 rtx low[2], high[2];
20306 if (CONST_INT_P (operands[2]))
20308 split_double_mode (mode, operands, 2, low, high);
20309 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20311 if (count >= half_width)
20313 emit_move_insn (high[0], low[1]);
20314 emit_move_insn (low[0], const0_rtx);
20316 if (count > half_width)
20317 ix86_expand_ashl_const (high[0], count - half_width, mode);
20321 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20323 if (!rtx_equal_p (operands[0], operands[1]))
20324 emit_move_insn (operands[0], operands[1]);
20326 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
20327 ix86_expand_ashl_const (low[0], count, mode);
20332 split_double_mode (mode, operands, 1, low, high);
20334 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20336 if (operands[1] == const1_rtx)
20338 /* Assuming we've chosen a QImode capable registers, then 1 << N
20339 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20340 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
20342 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
20344 ix86_expand_clear (low[0]);
20345 ix86_expand_clear (high[0]);
20346 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
20348 d = gen_lowpart (QImode, low[0]);
20349 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20350 s = gen_rtx_EQ (QImode, flags, const0_rtx);
20351 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20353 d = gen_lowpart (QImode, high[0]);
20354 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20355 s = gen_rtx_NE (QImode, flags, const0_rtx);
20356 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20359 /* Otherwise, we can get the same results by manually performing
20360 a bit extract operation on bit 5/6, and then performing the two
20361 shifts. The two methods of getting 0/1 into low/high are exactly
20362 the same size. Avoiding the shift in the bit extract case helps
20363 pentium4 a bit; no one else seems to care much either way. */
20366 enum machine_mode half_mode;
20367 rtx (*gen_lshr3)(rtx, rtx, rtx);
20368 rtx (*gen_and3)(rtx, rtx, rtx);
20369 rtx (*gen_xor3)(rtx, rtx, rtx);
20370 HOST_WIDE_INT bits;
20373 if (mode == DImode)
20375 half_mode = SImode;
20376 gen_lshr3 = gen_lshrsi3;
20377 gen_and3 = gen_andsi3;
20378 gen_xor3 = gen_xorsi3;
20383 half_mode = DImode;
20384 gen_lshr3 = gen_lshrdi3;
20385 gen_and3 = gen_anddi3;
20386 gen_xor3 = gen_xordi3;
20390 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
20391 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
20393 x = gen_lowpart (half_mode, operands[2]);
20394 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
20396 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
20397 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
20398 emit_move_insn (low[0], high[0]);
20399 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
20402 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20403 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
20407 if (operands[1] == constm1_rtx)
20409 /* For -1 << N, we can avoid the shld instruction, because we
20410 know that we're shifting 0...31/63 ones into a -1. */
20411 emit_move_insn (low[0], constm1_rtx);
20412 if (optimize_insn_for_size_p ())
20413 emit_move_insn (high[0], low[0]);
20415 emit_move_insn (high[0], constm1_rtx);
20419 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20421 if (!rtx_equal_p (operands[0], operands[1]))
20422 emit_move_insn (operands[0], operands[1]);
20424 split_double_mode (mode, operands, 1, low, high);
20425 emit_insn (gen_shld (high[0], low[0], operands[2]));
20428 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20430 if (TARGET_CMOVE && scratch)
20432 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20433 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20435 ix86_expand_clear (scratch);
20436 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
20440 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
20441 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20443 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
20448 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
20450 rtx (*gen_ashr3)(rtx, rtx, rtx)
20451 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
20452 rtx (*gen_shrd)(rtx, rtx, rtx);
20453 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20455 rtx low[2], high[2];
20458 if (CONST_INT_P (operands[2]))
20460 split_double_mode (mode, operands, 2, low, high);
20461 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20463 if (count == GET_MODE_BITSIZE (mode) - 1)
20465 emit_move_insn (high[0], high[1]);
20466 emit_insn (gen_ashr3 (high[0], high[0],
20467 GEN_INT (half_width - 1)));
20468 emit_move_insn (low[0], high[0]);
20471 else if (count >= half_width)
20473 emit_move_insn (low[0], high[1]);
20474 emit_move_insn (high[0], low[0]);
20475 emit_insn (gen_ashr3 (high[0], high[0],
20476 GEN_INT (half_width - 1)));
20478 if (count > half_width)
20479 emit_insn (gen_ashr3 (low[0], low[0],
20480 GEN_INT (count - half_width)));
20484 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20486 if (!rtx_equal_p (operands[0], operands[1]))
20487 emit_move_insn (operands[0], operands[1]);
20489 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
20490 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
20495 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20497 if (!rtx_equal_p (operands[0], operands[1]))
20498 emit_move_insn (operands[0], operands[1]);
20500 split_double_mode (mode, operands, 1, low, high);
20502 emit_insn (gen_shrd (low[0], high[0], operands[2]));
20503 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
20505 if (TARGET_CMOVE && scratch)
20507 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20508 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20510 emit_move_insn (scratch, high[0]);
20511 emit_insn (gen_ashr3 (scratch, scratch,
20512 GEN_INT (half_width - 1)));
20513 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
20518 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
20519 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
20521 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
20527 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
20529 rtx (*gen_lshr3)(rtx, rtx, rtx)
20530 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
20531 rtx (*gen_shrd)(rtx, rtx, rtx);
20532 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20534 rtx low[2], high[2];
20537 if (CONST_INT_P (operands[2]))
20539 split_double_mode (mode, operands, 2, low, high);
20540 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20542 if (count >= half_width)
20544 emit_move_insn (low[0], high[1]);
20545 ix86_expand_clear (high[0]);
20547 if (count > half_width)
20548 emit_insn (gen_lshr3 (low[0], low[0],
20549 GEN_INT (count - half_width)));
20553 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20555 if (!rtx_equal_p (operands[0], operands[1]))
20556 emit_move_insn (operands[0], operands[1]);
20558 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
20559 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
20564 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20566 if (!rtx_equal_p (operands[0], operands[1]))
20567 emit_move_insn (operands[0], operands[1]);
20569 split_double_mode (mode, operands, 1, low, high);
20571 emit_insn (gen_shrd (low[0], high[0], operands[2]));
20572 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
20574 if (TARGET_CMOVE && scratch)
20576 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20577 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20579 ix86_expand_clear (scratch);
20580 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
20585 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
20586 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20588 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
20593 /* Predict just emitted jump instruction to be taken with probability PROB. */
20595 predict_jump (int prob)
20597 rtx insn = get_last_insn ();
20598 gcc_assert (JUMP_P (insn));
20599 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
20602 /* Helper function for the string operations below. Dest VARIABLE whether
20603 it is aligned to VALUE bytes. If true, jump to the label. */
20605 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
20607 rtx label = gen_label_rtx ();
20608 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
20609 if (GET_MODE (variable) == DImode)
20610 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
20612 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
20613 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
20616 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20618 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20622 /* Adjust COUNTER by the VALUE. */
20624 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
20626 rtx (*gen_add)(rtx, rtx, rtx)
20627 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
20629 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
20632 /* Zero extend possibly SImode EXP to Pmode register. */
20634 ix86_zero_extend_to_Pmode (rtx exp)
20637 if (GET_MODE (exp) == VOIDmode)
20638 return force_reg (Pmode, exp);
20639 if (GET_MODE (exp) == Pmode)
20640 return copy_to_mode_reg (Pmode, exp);
20641 r = gen_reg_rtx (Pmode);
20642 emit_insn (gen_zero_extendsidi2 (r, exp));
20646 /* Divide COUNTREG by SCALE. */
20648 scale_counter (rtx countreg, int scale)
20654 if (CONST_INT_P (countreg))
20655 return GEN_INT (INTVAL (countreg) / scale);
20656 gcc_assert (REG_P (countreg));
20658 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
20659 GEN_INT (exact_log2 (scale)),
20660 NULL, 1, OPTAB_DIRECT);
20664 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
20665 DImode for constant loop counts. */
20667 static enum machine_mode
20668 counter_mode (rtx count_exp)
20670 if (GET_MODE (count_exp) != VOIDmode)
20671 return GET_MODE (count_exp);
20672 if (!CONST_INT_P (count_exp))
20674 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
20679 /* When SRCPTR is non-NULL, output simple loop to move memory
20680 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
20681 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
20682 equivalent loop to set memory by VALUE (supposed to be in MODE).
20684 The size is rounded down to whole number of chunk size moved at once.
20685 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
20689 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
20690 rtx destptr, rtx srcptr, rtx value,
20691 rtx count, enum machine_mode mode, int unroll,
20694 rtx out_label, top_label, iter, tmp;
20695 enum machine_mode iter_mode = counter_mode (count);
20696 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
20697 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
20703 top_label = gen_label_rtx ();
20704 out_label = gen_label_rtx ();
20705 iter = gen_reg_rtx (iter_mode);
20707 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
20708 NULL, 1, OPTAB_DIRECT);
20709 /* Those two should combine. */
20710 if (piece_size == const1_rtx)
20712 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
20714 predict_jump (REG_BR_PROB_BASE * 10 / 100);
20716 emit_move_insn (iter, const0_rtx);
20718 emit_label (top_label);
20720 tmp = convert_modes (Pmode, iter_mode, iter, true);
20721 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
20722 destmem = change_address (destmem, mode, x_addr);
20726 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
20727 srcmem = change_address (srcmem, mode, y_addr);
20729 /* When unrolling for chips that reorder memory reads and writes,
20730 we can save registers by using single temporary.
20731 Also using 4 temporaries is overkill in 32bit mode. */
20732 if (!TARGET_64BIT && 0)
20734 for (i = 0; i < unroll; i++)
20739 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20741 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20743 emit_move_insn (destmem, srcmem);
20749 gcc_assert (unroll <= 4);
20750 for (i = 0; i < unroll; i++)
20752 tmpreg[i] = gen_reg_rtx (mode);
20756 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20758 emit_move_insn (tmpreg[i], srcmem);
20760 for (i = 0; i < unroll; i++)
20765 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20767 emit_move_insn (destmem, tmpreg[i]);
20772 for (i = 0; i < unroll; i++)
20776 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20777 emit_move_insn (destmem, value);
20780 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
20781 true, OPTAB_LIB_WIDEN);
20783 emit_move_insn (iter, tmp);
20785 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
20787 if (expected_size != -1)
20789 expected_size /= GET_MODE_SIZE (mode) * unroll;
20790 if (expected_size == 0)
20792 else if (expected_size > REG_BR_PROB_BASE)
20793 predict_jump (REG_BR_PROB_BASE - 1);
20795 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
20798 predict_jump (REG_BR_PROB_BASE * 80 / 100);
20799 iter = ix86_zero_extend_to_Pmode (iter);
20800 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
20801 true, OPTAB_LIB_WIDEN);
20802 if (tmp != destptr)
20803 emit_move_insn (destptr, tmp);
20806 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
20807 true, OPTAB_LIB_WIDEN);
20809 emit_move_insn (srcptr, tmp);
20811 emit_label (out_label);
20814 /* Output "rep; mov" instruction.
20815 Arguments have same meaning as for previous function */
20817 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
20818 rtx destptr, rtx srcptr,
20820 enum machine_mode mode)
20825 HOST_WIDE_INT rounded_count;
20827 /* If the size is known, it is shorter to use rep movs. */
20828 if (mode == QImode && CONST_INT_P (count)
20829 && !(INTVAL (count) & 3))
20832 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20833 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20834 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
20835 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
20836 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20837 if (mode != QImode)
20839 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20840 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20841 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20842 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
20843 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20844 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
20848 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20849 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
20851 if (CONST_INT_P (count))
20853 rounded_count = (INTVAL (count)
20854 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20855 destmem = shallow_copy_rtx (destmem);
20856 srcmem = shallow_copy_rtx (srcmem);
20857 set_mem_size (destmem, rounded_count);
20858 set_mem_size (srcmem, rounded_count);
20862 if (MEM_SIZE_KNOWN_P (destmem))
20863 clear_mem_size (destmem);
20864 if (MEM_SIZE_KNOWN_P (srcmem))
20865 clear_mem_size (srcmem);
20867 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
20871 /* Output "rep; stos" instruction.
20872 Arguments have same meaning as for previous function */
20874 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
20875 rtx count, enum machine_mode mode,
20880 HOST_WIDE_INT rounded_count;
20882 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20883 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20884 value = force_reg (mode, gen_lowpart (mode, value));
20885 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20886 if (mode != QImode)
20888 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20889 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20890 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20893 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20894 if (orig_value == const0_rtx && CONST_INT_P (count))
20896 rounded_count = (INTVAL (count)
20897 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20898 destmem = shallow_copy_rtx (destmem);
20899 set_mem_size (destmem, rounded_count);
20901 else if (MEM_SIZE_KNOWN_P (destmem))
20902 clear_mem_size (destmem);
20903 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
20907 emit_strmov (rtx destmem, rtx srcmem,
20908 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
20910 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
20911 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
20912 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20915 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20917 expand_movmem_epilogue (rtx destmem, rtx srcmem,
20918 rtx destptr, rtx srcptr, rtx count, int max_size)
20921 if (CONST_INT_P (count))
20923 HOST_WIDE_INT countval = INTVAL (count);
20926 if ((countval & 0x10) && max_size > 16)
20930 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20931 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
20934 gcc_unreachable ();
20937 if ((countval & 0x08) && max_size > 8)
20940 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20943 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20944 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
20948 if ((countval & 0x04) && max_size > 4)
20950 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20953 if ((countval & 0x02) && max_size > 2)
20955 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
20958 if ((countval & 0x01) && max_size > 1)
20960 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
20967 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
20968 count, 1, OPTAB_DIRECT);
20969 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
20970 count, QImode, 1, 4);
20974 /* When there are stringops, we can cheaply increase dest and src pointers.
20975 Otherwise we save code size by maintaining offset (zero is readily
20976 available from preceding rep operation) and using x86 addressing modes.
20978 if (TARGET_SINGLE_STRINGOP)
20982 rtx label = ix86_expand_aligntest (count, 4, true);
20983 src = change_address (srcmem, SImode, srcptr);
20984 dest = change_address (destmem, SImode, destptr);
20985 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20986 emit_label (label);
20987 LABEL_NUSES (label) = 1;
20991 rtx label = ix86_expand_aligntest (count, 2, true);
20992 src = change_address (srcmem, HImode, srcptr);
20993 dest = change_address (destmem, HImode, destptr);
20994 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20995 emit_label (label);
20996 LABEL_NUSES (label) = 1;
21000 rtx label = ix86_expand_aligntest (count, 1, true);
21001 src = change_address (srcmem, QImode, srcptr);
21002 dest = change_address (destmem, QImode, destptr);
21003 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21004 emit_label (label);
21005 LABEL_NUSES (label) = 1;
21010 rtx offset = force_reg (Pmode, const0_rtx);
21015 rtx label = ix86_expand_aligntest (count, 4, true);
21016 src = change_address (srcmem, SImode, srcptr);
21017 dest = change_address (destmem, SImode, destptr);
21018 emit_move_insn (dest, src);
21019 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
21020 true, OPTAB_LIB_WIDEN);
21022 emit_move_insn (offset, tmp);
21023 emit_label (label);
21024 LABEL_NUSES (label) = 1;
21028 rtx label = ix86_expand_aligntest (count, 2, true);
21029 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
21030 src = change_address (srcmem, HImode, tmp);
21031 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
21032 dest = change_address (destmem, HImode, tmp);
21033 emit_move_insn (dest, src);
21034 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
21035 true, OPTAB_LIB_WIDEN);
21037 emit_move_insn (offset, tmp);
21038 emit_label (label);
21039 LABEL_NUSES (label) = 1;
21043 rtx label = ix86_expand_aligntest (count, 1, true);
21044 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
21045 src = change_address (srcmem, QImode, tmp);
21046 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
21047 dest = change_address (destmem, QImode, tmp);
21048 emit_move_insn (dest, src);
21049 emit_label (label);
21050 LABEL_NUSES (label) = 1;
21055 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21057 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
21058 rtx count, int max_size)
21061 expand_simple_binop (counter_mode (count), AND, count,
21062 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
21063 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
21064 gen_lowpart (QImode, value), count, QImode,
21068 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21070 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
21074 if (CONST_INT_P (count))
21076 HOST_WIDE_INT countval = INTVAL (count);
21079 if ((countval & 0x10) && max_size > 16)
21083 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
21084 emit_insn (gen_strset (destptr, dest, value));
21085 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
21086 emit_insn (gen_strset (destptr, dest, value));
21089 gcc_unreachable ();
21092 if ((countval & 0x08) && max_size > 8)
21096 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
21097 emit_insn (gen_strset (destptr, dest, value));
21101 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
21102 emit_insn (gen_strset (destptr, dest, value));
21103 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
21104 emit_insn (gen_strset (destptr, dest, value));
21108 if ((countval & 0x04) && max_size > 4)
21110 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
21111 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
21114 if ((countval & 0x02) && max_size > 2)
21116 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
21117 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
21120 if ((countval & 0x01) && max_size > 1)
21122 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
21123 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
21130 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
21135 rtx label = ix86_expand_aligntest (count, 16, true);
21138 dest = change_address (destmem, DImode, destptr);
21139 emit_insn (gen_strset (destptr, dest, value));
21140 emit_insn (gen_strset (destptr, dest, value));
21144 dest = change_address (destmem, SImode, destptr);
21145 emit_insn (gen_strset (destptr, dest, value));
21146 emit_insn (gen_strset (destptr, dest, value));
21147 emit_insn (gen_strset (destptr, dest, value));
21148 emit_insn (gen_strset (destptr, dest, value));
21150 emit_label (label);
21151 LABEL_NUSES (label) = 1;
21155 rtx label = ix86_expand_aligntest (count, 8, true);
21158 dest = change_address (destmem, DImode, destptr);
21159 emit_insn (gen_strset (destptr, dest, value));
21163 dest = change_address (destmem, SImode, destptr);
21164 emit_insn (gen_strset (destptr, dest, value));
21165 emit_insn (gen_strset (destptr, dest, value));
21167 emit_label (label);
21168 LABEL_NUSES (label) = 1;
21172 rtx label = ix86_expand_aligntest (count, 4, true);
21173 dest = change_address (destmem, SImode, destptr);
21174 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
21175 emit_label (label);
21176 LABEL_NUSES (label) = 1;
21180 rtx label = ix86_expand_aligntest (count, 2, true);
21181 dest = change_address (destmem, HImode, destptr);
21182 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
21183 emit_label (label);
21184 LABEL_NUSES (label) = 1;
21188 rtx label = ix86_expand_aligntest (count, 1, true);
21189 dest = change_address (destmem, QImode, destptr);
21190 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
21191 emit_label (label);
21192 LABEL_NUSES (label) = 1;
21196 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21197 DESIRED_ALIGNMENT. */
21199 expand_movmem_prologue (rtx destmem, rtx srcmem,
21200 rtx destptr, rtx srcptr, rtx count,
21201 int align, int desired_alignment)
21203 if (align <= 1 && desired_alignment > 1)
21205 rtx label = ix86_expand_aligntest (destptr, 1, false);
21206 srcmem = change_address (srcmem, QImode, srcptr);
21207 destmem = change_address (destmem, QImode, destptr);
21208 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21209 ix86_adjust_counter (count, 1);
21210 emit_label (label);
21211 LABEL_NUSES (label) = 1;
21213 if (align <= 2 && desired_alignment > 2)
21215 rtx label = ix86_expand_aligntest (destptr, 2, false);
21216 srcmem = change_address (srcmem, HImode, srcptr);
21217 destmem = change_address (destmem, HImode, destptr);
21218 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21219 ix86_adjust_counter (count, 2);
21220 emit_label (label);
21221 LABEL_NUSES (label) = 1;
21223 if (align <= 4 && desired_alignment > 4)
21225 rtx label = ix86_expand_aligntest (destptr, 4, false);
21226 srcmem = change_address (srcmem, SImode, srcptr);
21227 destmem = change_address (destmem, SImode, destptr);
21228 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21229 ix86_adjust_counter (count, 4);
21230 emit_label (label);
21231 LABEL_NUSES (label) = 1;
21233 gcc_assert (desired_alignment <= 8);
21236 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21237 ALIGN_BYTES is how many bytes need to be copied. */
21239 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
21240 int desired_align, int align_bytes)
21243 rtx orig_dst = dst;
21244 rtx orig_src = src;
21246 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
21247 if (src_align_bytes >= 0)
21248 src_align_bytes = desired_align - src_align_bytes;
21249 if (align_bytes & 1)
21251 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21252 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
21254 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21256 if (align_bytes & 2)
21258 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21259 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
21260 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21261 set_mem_align (dst, 2 * BITS_PER_UNIT);
21262 if (src_align_bytes >= 0
21263 && (src_align_bytes & 1) == (align_bytes & 1)
21264 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
21265 set_mem_align (src, 2 * BITS_PER_UNIT);
21267 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21269 if (align_bytes & 4)
21271 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21272 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
21273 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21274 set_mem_align (dst, 4 * BITS_PER_UNIT);
21275 if (src_align_bytes >= 0)
21277 unsigned int src_align = 0;
21278 if ((src_align_bytes & 3) == (align_bytes & 3))
21280 else if ((src_align_bytes & 1) == (align_bytes & 1))
21282 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21283 set_mem_align (src, src_align * BITS_PER_UNIT);
21286 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21288 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21289 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
21290 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21291 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21292 if (src_align_bytes >= 0)
21294 unsigned int src_align = 0;
21295 if ((src_align_bytes & 7) == (align_bytes & 7))
21297 else if ((src_align_bytes & 3) == (align_bytes & 3))
21299 else if ((src_align_bytes & 1) == (align_bytes & 1))
21301 if (src_align > (unsigned int) desired_align)
21302 src_align = desired_align;
21303 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21304 set_mem_align (src, src_align * BITS_PER_UNIT);
21306 if (MEM_SIZE_KNOWN_P (orig_dst))
21307 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21308 if (MEM_SIZE_KNOWN_P (orig_src))
21309 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
21314 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21315 DESIRED_ALIGNMENT. */
21317 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
21318 int align, int desired_alignment)
21320 if (align <= 1 && desired_alignment > 1)
21322 rtx label = ix86_expand_aligntest (destptr, 1, false);
21323 destmem = change_address (destmem, QImode, destptr);
21324 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
21325 ix86_adjust_counter (count, 1);
21326 emit_label (label);
21327 LABEL_NUSES (label) = 1;
21329 if (align <= 2 && desired_alignment > 2)
21331 rtx label = ix86_expand_aligntest (destptr, 2, false);
21332 destmem = change_address (destmem, HImode, destptr);
21333 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
21334 ix86_adjust_counter (count, 2);
21335 emit_label (label);
21336 LABEL_NUSES (label) = 1;
21338 if (align <= 4 && desired_alignment > 4)
21340 rtx label = ix86_expand_aligntest (destptr, 4, false);
21341 destmem = change_address (destmem, SImode, destptr);
21342 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
21343 ix86_adjust_counter (count, 4);
21344 emit_label (label);
21345 LABEL_NUSES (label) = 1;
21347 gcc_assert (desired_alignment <= 8);
21350 /* Set enough from DST to align DST known to by aligned by ALIGN to
21351 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21353 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
21354 int desired_align, int align_bytes)
21357 rtx orig_dst = dst;
21358 if (align_bytes & 1)
21360 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21362 emit_insn (gen_strset (destreg, dst,
21363 gen_lowpart (QImode, value)));
21365 if (align_bytes & 2)
21367 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21368 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21369 set_mem_align (dst, 2 * BITS_PER_UNIT);
21371 emit_insn (gen_strset (destreg, dst,
21372 gen_lowpart (HImode, value)));
21374 if (align_bytes & 4)
21376 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21377 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21378 set_mem_align (dst, 4 * BITS_PER_UNIT);
21380 emit_insn (gen_strset (destreg, dst,
21381 gen_lowpart (SImode, value)));
21383 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21384 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21385 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21386 if (MEM_SIZE_KNOWN_P (orig_dst))
21387 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21391 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21392 static enum stringop_alg
21393 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
21394 int *dynamic_check)
21396 const struct stringop_algs * algs;
21397 bool optimize_for_speed;
21398 /* Algorithms using the rep prefix want at least edi and ecx;
21399 additionally, memset wants eax and memcpy wants esi. Don't
21400 consider such algorithms if the user has appropriated those
21401 registers for their own purposes. */
21402 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
21404 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
21406 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21407 || (alg != rep_prefix_1_byte \
21408 && alg != rep_prefix_4_byte \
21409 && alg != rep_prefix_8_byte))
21410 const struct processor_costs *cost;
21412 /* Even if the string operation call is cold, we still might spend a lot
21413 of time processing large blocks. */
21414 if (optimize_function_for_size_p (cfun)
21415 || (optimize_insn_for_size_p ()
21416 && expected_size != -1 && expected_size < 256))
21417 optimize_for_speed = false;
21419 optimize_for_speed = true;
21421 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
21423 *dynamic_check = -1;
21425 algs = &cost->memset[TARGET_64BIT != 0];
21427 algs = &cost->memcpy[TARGET_64BIT != 0];
21428 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
21429 return ix86_stringop_alg;
21430 /* rep; movq or rep; movl is the smallest variant. */
21431 else if (!optimize_for_speed)
21433 if (!count || (count & 3))
21434 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
21436 return rep_prefix_usable ? rep_prefix_4_byte : loop;
21438 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21440 else if (expected_size != -1 && expected_size < 4)
21441 return loop_1_byte;
21442 else if (expected_size != -1)
21445 enum stringop_alg alg = libcall;
21446 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
21448 /* We get here if the algorithms that were not libcall-based
21449 were rep-prefix based and we are unable to use rep prefixes
21450 based on global register usage. Break out of the loop and
21451 use the heuristic below. */
21452 if (algs->size[i].max == 0)
21454 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
21456 enum stringop_alg candidate = algs->size[i].alg;
21458 if (candidate != libcall && ALG_USABLE_P (candidate))
21460 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
21461 last non-libcall inline algorithm. */
21462 if (TARGET_INLINE_ALL_STRINGOPS)
21464 /* When the current size is best to be copied by a libcall,
21465 but we are still forced to inline, run the heuristic below
21466 that will pick code for medium sized blocks. */
21467 if (alg != libcall)
21471 else if (ALG_USABLE_P (candidate))
21475 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
21477 /* When asked to inline the call anyway, try to pick meaningful choice.
21478 We look for maximal size of block that is faster to copy by hand and
21479 take blocks of at most of that size guessing that average size will
21480 be roughly half of the block.
21482 If this turns out to be bad, we might simply specify the preferred
21483 choice in ix86_costs. */
21484 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21485 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
21488 enum stringop_alg alg;
21490 bool any_alg_usable_p = true;
21492 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
21494 enum stringop_alg candidate = algs->size[i].alg;
21495 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
21497 if (candidate != libcall && candidate
21498 && ALG_USABLE_P (candidate))
21499 max = algs->size[i].max;
21501 /* If there aren't any usable algorithms, then recursing on
21502 smaller sizes isn't going to find anything. Just return the
21503 simple byte-at-a-time copy loop. */
21504 if (!any_alg_usable_p)
21506 /* Pick something reasonable. */
21507 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21508 *dynamic_check = 128;
21509 return loop_1_byte;
21513 alg = decide_alg (count, max / 2, memset, dynamic_check);
21514 gcc_assert (*dynamic_check == -1);
21515 gcc_assert (alg != libcall);
21516 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21517 *dynamic_check = max;
21520 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
21521 #undef ALG_USABLE_P
21524 /* Decide on alignment. We know that the operand is already aligned to ALIGN
21525 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
21527 decide_alignment (int align,
21528 enum stringop_alg alg,
21531 int desired_align = 0;
21535 gcc_unreachable ();
21537 case unrolled_loop:
21538 desired_align = GET_MODE_SIZE (Pmode);
21540 case rep_prefix_8_byte:
21543 case rep_prefix_4_byte:
21544 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21545 copying whole cacheline at once. */
21546 if (TARGET_PENTIUMPRO)
21551 case rep_prefix_1_byte:
21552 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21553 copying whole cacheline at once. */
21554 if (TARGET_PENTIUMPRO)
21568 if (desired_align < align)
21569 desired_align = align;
21570 if (expected_size != -1 && expected_size < 4)
21571 desired_align = align;
21572 return desired_align;
21575 /* Return the smallest power of 2 greater than VAL. */
21577 smallest_pow2_greater_than (int val)
21585 /* Expand string move (memcpy) operation. Use i386 string operations
21586 when profitable. expand_setmem contains similar code. The code
21587 depends upon architecture, block size and alignment, but always has
21588 the same overall structure:
21590 1) Prologue guard: Conditional that jumps up to epilogues for small
21591 blocks that can be handled by epilogue alone. This is faster
21592 but also needed for correctness, since prologue assume the block
21593 is larger than the desired alignment.
21595 Optional dynamic check for size and libcall for large
21596 blocks is emitted here too, with -minline-stringops-dynamically.
21598 2) Prologue: copy first few bytes in order to get destination
21599 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
21600 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
21601 copied. We emit either a jump tree on power of two sized
21602 blocks, or a byte loop.
21604 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
21605 with specified algorithm.
21607 4) Epilogue: code copying tail of the block that is too small to be
21608 handled by main body (or up to size guarded by prologue guard). */
21611 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
21612 rtx expected_align_exp, rtx expected_size_exp)
21618 rtx jump_around_label = NULL;
21619 HOST_WIDE_INT align = 1;
21620 unsigned HOST_WIDE_INT count = 0;
21621 HOST_WIDE_INT expected_size = -1;
21622 int size_needed = 0, epilogue_size_needed;
21623 int desired_align = 0, align_bytes = 0;
21624 enum stringop_alg alg;
21626 bool need_zero_guard = false;
21628 if (CONST_INT_P (align_exp))
21629 align = INTVAL (align_exp);
21630 /* i386 can do misaligned access on reasonably increased cost. */
21631 if (CONST_INT_P (expected_align_exp)
21632 && INTVAL (expected_align_exp) > align)
21633 align = INTVAL (expected_align_exp);
21634 /* ALIGN is the minimum of destination and source alignment, but we care here
21635 just about destination alignment. */
21636 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
21637 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
21639 if (CONST_INT_P (count_exp))
21640 count = expected_size = INTVAL (count_exp);
21641 if (CONST_INT_P (expected_size_exp) && count == 0)
21642 expected_size = INTVAL (expected_size_exp);
21644 /* Make sure we don't need to care about overflow later on. */
21645 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21648 /* Step 0: Decide on preferred algorithm, desired alignment and
21649 size of chunks to be copied by main loop. */
21651 alg = decide_alg (count, expected_size, false, &dynamic_check);
21652 desired_align = decide_alignment (align, alg, expected_size);
21654 if (!TARGET_ALIGN_STRINGOPS)
21655 align = desired_align;
21657 if (alg == libcall)
21659 gcc_assert (alg != no_stringop);
21661 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
21662 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21663 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
21668 gcc_unreachable ();
21670 need_zero_guard = true;
21671 size_needed = GET_MODE_SIZE (Pmode);
21673 case unrolled_loop:
21674 need_zero_guard = true;
21675 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
21677 case rep_prefix_8_byte:
21680 case rep_prefix_4_byte:
21683 case rep_prefix_1_byte:
21687 need_zero_guard = true;
21692 epilogue_size_needed = size_needed;
21694 /* Step 1: Prologue guard. */
21696 /* Alignment code needs count to be in register. */
21697 if (CONST_INT_P (count_exp) && desired_align > align)
21699 if (INTVAL (count_exp) > desired_align
21700 && INTVAL (count_exp) > size_needed)
21703 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21704 if (align_bytes <= 0)
21707 align_bytes = desired_align - align_bytes;
21709 if (align_bytes == 0)
21710 count_exp = force_reg (counter_mode (count_exp), count_exp);
21712 gcc_assert (desired_align >= 1 && align >= 1);
21714 /* Ensure that alignment prologue won't copy past end of block. */
21715 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21717 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21718 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
21719 Make sure it is power of 2. */
21720 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21724 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21726 /* If main algorithm works on QImode, no epilogue is needed.
21727 For small sizes just don't align anything. */
21728 if (size_needed == 1)
21729 desired_align = align;
21736 label = gen_label_rtx ();
21737 emit_cmp_and_jump_insns (count_exp,
21738 GEN_INT (epilogue_size_needed),
21739 LTU, 0, counter_mode (count_exp), 1, label);
21740 if (expected_size == -1 || expected_size < epilogue_size_needed)
21741 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21743 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21747 /* Emit code to decide on runtime whether library call or inline should be
21749 if (dynamic_check != -1)
21751 if (CONST_INT_P (count_exp))
21753 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
21755 emit_block_move_via_libcall (dst, src, count_exp, false);
21756 count_exp = const0_rtx;
21762 rtx hot_label = gen_label_rtx ();
21763 jump_around_label = gen_label_rtx ();
21764 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21765 LEU, 0, GET_MODE (count_exp), 1, hot_label);
21766 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21767 emit_block_move_via_libcall (dst, src, count_exp, false);
21768 emit_jump (jump_around_label);
21769 emit_label (hot_label);
21773 /* Step 2: Alignment prologue. */
21775 if (desired_align > align)
21777 if (align_bytes == 0)
21779 /* Except for the first move in epilogue, we no longer know
21780 constant offset in aliasing info. It don't seems to worth
21781 the pain to maintain it for the first move, so throw away
21783 src = change_address (src, BLKmode, srcreg);
21784 dst = change_address (dst, BLKmode, destreg);
21785 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
21790 /* If we know how many bytes need to be stored before dst is
21791 sufficiently aligned, maintain aliasing info accurately. */
21792 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
21793 desired_align, align_bytes);
21794 count_exp = plus_constant (count_exp, -align_bytes);
21795 count -= align_bytes;
21797 if (need_zero_guard
21798 && (count < (unsigned HOST_WIDE_INT) size_needed
21799 || (align_bytes == 0
21800 && count < ((unsigned HOST_WIDE_INT) size_needed
21801 + desired_align - align))))
21803 /* It is possible that we copied enough so the main loop will not
21805 gcc_assert (size_needed > 1);
21806 if (label == NULL_RTX)
21807 label = gen_label_rtx ();
21808 emit_cmp_and_jump_insns (count_exp,
21809 GEN_INT (size_needed),
21810 LTU, 0, counter_mode (count_exp), 1, label);
21811 if (expected_size == -1
21812 || expected_size < (desired_align - align) / 2 + size_needed)
21813 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21815 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21818 if (label && size_needed == 1)
21820 emit_label (label);
21821 LABEL_NUSES (label) = 1;
21823 epilogue_size_needed = 1;
21825 else if (label == NULL_RTX)
21826 epilogue_size_needed = size_needed;
21828 /* Step 3: Main loop. */
21834 gcc_unreachable ();
21836 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21837 count_exp, QImode, 1, expected_size);
21840 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21841 count_exp, Pmode, 1, expected_size);
21843 case unrolled_loop:
21844 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
21845 registers for 4 temporaries anyway. */
21846 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21847 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
21850 case rep_prefix_8_byte:
21851 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21854 case rep_prefix_4_byte:
21855 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21858 case rep_prefix_1_byte:
21859 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21863 /* Adjust properly the offset of src and dest memory for aliasing. */
21864 if (CONST_INT_P (count_exp))
21866 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
21867 (count / size_needed) * size_needed);
21868 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21869 (count / size_needed) * size_needed);
21873 src = change_address (src, BLKmode, srcreg);
21874 dst = change_address (dst, BLKmode, destreg);
21877 /* Step 4: Epilogue to copy the remaining bytes. */
21881 /* When the main loop is done, COUNT_EXP might hold original count,
21882 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21883 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21884 bytes. Compensate if needed. */
21886 if (size_needed < epilogue_size_needed)
21889 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21890 GEN_INT (size_needed - 1), count_exp, 1,
21892 if (tmp != count_exp)
21893 emit_move_insn (count_exp, tmp);
21895 emit_label (label);
21896 LABEL_NUSES (label) = 1;
21899 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21900 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
21901 epilogue_size_needed);
21902 if (jump_around_label)
21903 emit_label (jump_around_label);
21907 /* Helper function for memcpy. For QImode value 0xXY produce
21908 0xXYXYXYXY of wide specified by MODE. This is essentially
21909 a * 0x10101010, but we can do slightly better than
21910 synth_mult by unwinding the sequence by hand on CPUs with
21913 promote_duplicated_reg (enum machine_mode mode, rtx val)
21915 enum machine_mode valmode = GET_MODE (val);
21917 int nops = mode == DImode ? 3 : 2;
21919 gcc_assert (mode == SImode || mode == DImode);
21920 if (val == const0_rtx)
21921 return copy_to_mode_reg (mode, const0_rtx);
21922 if (CONST_INT_P (val))
21924 HOST_WIDE_INT v = INTVAL (val) & 255;
21928 if (mode == DImode)
21929 v |= (v << 16) << 16;
21930 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
21933 if (valmode == VOIDmode)
21935 if (valmode != QImode)
21936 val = gen_lowpart (QImode, val);
21937 if (mode == QImode)
21939 if (!TARGET_PARTIAL_REG_STALL)
21941 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
21942 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
21943 <= (ix86_cost->shift_const + ix86_cost->add) * nops
21944 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
21946 rtx reg = convert_modes (mode, QImode, val, true);
21947 tmp = promote_duplicated_reg (mode, const1_rtx);
21948 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
21953 rtx reg = convert_modes (mode, QImode, val, true);
21955 if (!TARGET_PARTIAL_REG_STALL)
21956 if (mode == SImode)
21957 emit_insn (gen_movsi_insv_1 (reg, reg));
21959 emit_insn (gen_movdi_insv_1 (reg, reg));
21962 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
21963 NULL, 1, OPTAB_DIRECT);
21965 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21967 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
21968 NULL, 1, OPTAB_DIRECT);
21969 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21970 if (mode == SImode)
21972 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
21973 NULL, 1, OPTAB_DIRECT);
21974 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21979 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21980 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21981 alignment from ALIGN to DESIRED_ALIGN. */
21983 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
21988 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
21989 promoted_val = promote_duplicated_reg (DImode, val);
21990 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
21991 promoted_val = promote_duplicated_reg (SImode, val);
21992 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21993 promoted_val = promote_duplicated_reg (HImode, val);
21995 promoted_val = val;
21997 return promoted_val;
22000 /* Expand string clear operation (bzero). Use i386 string operations when
22001 profitable. See expand_movmem comment for explanation of individual
22002 steps performed. */
22004 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
22005 rtx expected_align_exp, rtx expected_size_exp)
22010 rtx jump_around_label = NULL;
22011 HOST_WIDE_INT align = 1;
22012 unsigned HOST_WIDE_INT count = 0;
22013 HOST_WIDE_INT expected_size = -1;
22014 int size_needed = 0, epilogue_size_needed;
22015 int desired_align = 0, align_bytes = 0;
22016 enum stringop_alg alg;
22017 rtx promoted_val = NULL;
22018 bool force_loopy_epilogue = false;
22020 bool need_zero_guard = false;
22022 if (CONST_INT_P (align_exp))
22023 align = INTVAL (align_exp);
22024 /* i386 can do misaligned access on reasonably increased cost. */
22025 if (CONST_INT_P (expected_align_exp)
22026 && INTVAL (expected_align_exp) > align)
22027 align = INTVAL (expected_align_exp);
22028 if (CONST_INT_P (count_exp))
22029 count = expected_size = INTVAL (count_exp);
22030 if (CONST_INT_P (expected_size_exp) && count == 0)
22031 expected_size = INTVAL (expected_size_exp);
22033 /* Make sure we don't need to care about overflow later on. */
22034 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22037 /* Step 0: Decide on preferred algorithm, desired alignment and
22038 size of chunks to be copied by main loop. */
22040 alg = decide_alg (count, expected_size, true, &dynamic_check);
22041 desired_align = decide_alignment (align, alg, expected_size);
22043 if (!TARGET_ALIGN_STRINGOPS)
22044 align = desired_align;
22046 if (alg == libcall)
22048 gcc_assert (alg != no_stringop);
22050 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
22051 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
22056 gcc_unreachable ();
22058 need_zero_guard = true;
22059 size_needed = GET_MODE_SIZE (Pmode);
22061 case unrolled_loop:
22062 need_zero_guard = true;
22063 size_needed = GET_MODE_SIZE (Pmode) * 4;
22065 case rep_prefix_8_byte:
22068 case rep_prefix_4_byte:
22071 case rep_prefix_1_byte:
22075 need_zero_guard = true;
22079 epilogue_size_needed = size_needed;
22081 /* Step 1: Prologue guard. */
22083 /* Alignment code needs count to be in register. */
22084 if (CONST_INT_P (count_exp) && desired_align > align)
22086 if (INTVAL (count_exp) > desired_align
22087 && INTVAL (count_exp) > size_needed)
22090 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22091 if (align_bytes <= 0)
22094 align_bytes = desired_align - align_bytes;
22096 if (align_bytes == 0)
22098 enum machine_mode mode = SImode;
22099 if (TARGET_64BIT && (count & ~0xffffffff))
22101 count_exp = force_reg (mode, count_exp);
22104 /* Do the cheap promotion to allow better CSE across the
22105 main loop and epilogue (ie one load of the big constant in the
22106 front of all code. */
22107 if (CONST_INT_P (val_exp))
22108 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
22109 desired_align, align);
22110 /* Ensure that alignment prologue won't copy past end of block. */
22111 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22113 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22114 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22115 Make sure it is power of 2. */
22116 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22118 /* To improve performance of small blocks, we jump around the VAL
22119 promoting mode. This mean that if the promoted VAL is not constant,
22120 we might not use it in the epilogue and have to use byte
22122 if (epilogue_size_needed > 2 && !promoted_val)
22123 force_loopy_epilogue = true;
22126 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22128 /* If main algorithm works on QImode, no epilogue is needed.
22129 For small sizes just don't align anything. */
22130 if (size_needed == 1)
22131 desired_align = align;
22138 label = gen_label_rtx ();
22139 emit_cmp_and_jump_insns (count_exp,
22140 GEN_INT (epilogue_size_needed),
22141 LTU, 0, counter_mode (count_exp), 1, label);
22142 if (expected_size == -1 || expected_size <= epilogue_size_needed)
22143 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22145 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22148 if (dynamic_check != -1)
22150 rtx hot_label = gen_label_rtx ();
22151 jump_around_label = gen_label_rtx ();
22152 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22153 LEU, 0, counter_mode (count_exp), 1, hot_label);
22154 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22155 set_storage_via_libcall (dst, count_exp, val_exp, false);
22156 emit_jump (jump_around_label);
22157 emit_label (hot_label);
22160 /* Step 2: Alignment prologue. */
22162 /* Do the expensive promotion once we branched off the small blocks. */
22164 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
22165 desired_align, align);
22166 gcc_assert (desired_align >= 1 && align >= 1);
22168 if (desired_align > align)
22170 if (align_bytes == 0)
22172 /* Except for the first move in epilogue, we no longer know
22173 constant offset in aliasing info. It don't seems to worth
22174 the pain to maintain it for the first move, so throw away
22176 dst = change_address (dst, BLKmode, destreg);
22177 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
22182 /* If we know how many bytes need to be stored before dst is
22183 sufficiently aligned, maintain aliasing info accurately. */
22184 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
22185 desired_align, align_bytes);
22186 count_exp = plus_constant (count_exp, -align_bytes);
22187 count -= align_bytes;
22189 if (need_zero_guard
22190 && (count < (unsigned HOST_WIDE_INT) size_needed
22191 || (align_bytes == 0
22192 && count < ((unsigned HOST_WIDE_INT) size_needed
22193 + desired_align - align))))
22195 /* It is possible that we copied enough so the main loop will not
22197 gcc_assert (size_needed > 1);
22198 if (label == NULL_RTX)
22199 label = gen_label_rtx ();
22200 emit_cmp_and_jump_insns (count_exp,
22201 GEN_INT (size_needed),
22202 LTU, 0, counter_mode (count_exp), 1, label);
22203 if (expected_size == -1
22204 || expected_size < (desired_align - align) / 2 + size_needed)
22205 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22207 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22210 if (label && size_needed == 1)
22212 emit_label (label);
22213 LABEL_NUSES (label) = 1;
22215 promoted_val = val_exp;
22216 epilogue_size_needed = 1;
22218 else if (label == NULL_RTX)
22219 epilogue_size_needed = size_needed;
22221 /* Step 3: Main loop. */
22227 gcc_unreachable ();
22229 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22230 count_exp, QImode, 1, expected_size);
22233 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22234 count_exp, Pmode, 1, expected_size);
22236 case unrolled_loop:
22237 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22238 count_exp, Pmode, 4, expected_size);
22240 case rep_prefix_8_byte:
22241 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22244 case rep_prefix_4_byte:
22245 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22248 case rep_prefix_1_byte:
22249 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22253 /* Adjust properly the offset of src and dest memory for aliasing. */
22254 if (CONST_INT_P (count_exp))
22255 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
22256 (count / size_needed) * size_needed);
22258 dst = change_address (dst, BLKmode, destreg);
22260 /* Step 4: Epilogue to copy the remaining bytes. */
22264 /* When the main loop is done, COUNT_EXP might hold original count,
22265 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22266 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22267 bytes. Compensate if needed. */
22269 if (size_needed < epilogue_size_needed)
22272 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22273 GEN_INT (size_needed - 1), count_exp, 1,
22275 if (tmp != count_exp)
22276 emit_move_insn (count_exp, tmp);
22278 emit_label (label);
22279 LABEL_NUSES (label) = 1;
22282 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22284 if (force_loopy_epilogue)
22285 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
22286 epilogue_size_needed);
22288 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
22289 epilogue_size_needed);
22291 if (jump_around_label)
22292 emit_label (jump_around_label);
22296 /* Expand the appropriate insns for doing strlen if not just doing
22299 out = result, initialized with the start address
22300 align_rtx = alignment of the address.
22301 scratch = scratch register, initialized with the startaddress when
22302 not aligned, otherwise undefined
22304 This is just the body. It needs the initializations mentioned above and
22305 some address computing at the end. These things are done in i386.md. */
22308 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
22312 rtx align_2_label = NULL_RTX;
22313 rtx align_3_label = NULL_RTX;
22314 rtx align_4_label = gen_label_rtx ();
22315 rtx end_0_label = gen_label_rtx ();
22317 rtx tmpreg = gen_reg_rtx (SImode);
22318 rtx scratch = gen_reg_rtx (SImode);
22322 if (CONST_INT_P (align_rtx))
22323 align = INTVAL (align_rtx);
22325 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22327 /* Is there a known alignment and is it less than 4? */
22330 rtx scratch1 = gen_reg_rtx (Pmode);
22331 emit_move_insn (scratch1, out);
22332 /* Is there a known alignment and is it not 2? */
22335 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
22336 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
22338 /* Leave just the 3 lower bits. */
22339 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
22340 NULL_RTX, 0, OPTAB_WIDEN);
22342 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22343 Pmode, 1, align_4_label);
22344 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
22345 Pmode, 1, align_2_label);
22346 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
22347 Pmode, 1, align_3_label);
22351 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22352 check if is aligned to 4 - byte. */
22354 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
22355 NULL_RTX, 0, OPTAB_WIDEN);
22357 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22358 Pmode, 1, align_4_label);
22361 mem = change_address (src, QImode, out);
22363 /* Now compare the bytes. */
22365 /* Compare the first n unaligned byte on a byte per byte basis. */
22366 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
22367 QImode, 1, end_0_label);
22369 /* Increment the address. */
22370 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22372 /* Not needed with an alignment of 2 */
22375 emit_label (align_2_label);
22377 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
22380 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22382 emit_label (align_3_label);
22385 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
22388 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22391 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22392 align this loop. It gives only huge programs, but does not help to
22394 emit_label (align_4_label);
22396 mem = change_address (src, SImode, out);
22397 emit_move_insn (scratch, mem);
22398 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
22400 /* This formula yields a nonzero result iff one of the bytes is zero.
22401 This saves three branches inside loop and many cycles. */
22403 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
22404 emit_insn (gen_one_cmplsi2 (scratch, scratch));
22405 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
22406 emit_insn (gen_andsi3 (tmpreg, tmpreg,
22407 gen_int_mode (0x80808080, SImode)));
22408 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
22413 rtx reg = gen_reg_rtx (SImode);
22414 rtx reg2 = gen_reg_rtx (Pmode);
22415 emit_move_insn (reg, tmpreg);
22416 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
22418 /* If zero is not in the first two bytes, move two bytes forward. */
22419 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
22420 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22421 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
22422 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
22423 gen_rtx_IF_THEN_ELSE (SImode, tmp,
22426 /* Emit lea manually to avoid clobbering of flags. */
22427 emit_insn (gen_rtx_SET (SImode, reg2,
22428 gen_rtx_PLUS (Pmode, out, const2_rtx)));
22430 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22431 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
22432 emit_insn (gen_rtx_SET (VOIDmode, out,
22433 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
22439 rtx end_2_label = gen_label_rtx ();
22440 /* Is zero in the first two bytes? */
22442 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
22443 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22444 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
22445 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22446 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
22448 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22449 JUMP_LABEL (tmp) = end_2_label;
22451 /* Not in the first two. Move two bytes forward. */
22452 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
22453 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
22455 emit_label (end_2_label);
22459 /* Avoid branch in fixing the byte. */
22460 tmpreg = gen_lowpart (QImode, tmpreg);
22461 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
22462 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
22463 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
22464 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
22466 emit_label (end_0_label);
22469 /* Expand strlen. */
22472 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
22474 rtx addr, scratch1, scratch2, scratch3, scratch4;
22476 /* The generic case of strlen expander is long. Avoid it's
22477 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
22479 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
22480 && !TARGET_INLINE_ALL_STRINGOPS
22481 && !optimize_insn_for_size_p ()
22482 && (!CONST_INT_P (align) || INTVAL (align) < 4))
22485 addr = force_reg (Pmode, XEXP (src, 0));
22486 scratch1 = gen_reg_rtx (Pmode);
22488 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
22489 && !optimize_insn_for_size_p ())
22491 /* Well it seems that some optimizer does not combine a call like
22492 foo(strlen(bar), strlen(bar));
22493 when the move and the subtraction is done here. It does calculate
22494 the length just once when these instructions are done inside of
22495 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
22496 often used and I use one fewer register for the lifetime of
22497 output_strlen_unroll() this is better. */
22499 emit_move_insn (out, addr);
22501 ix86_expand_strlensi_unroll_1 (out, src, align);
22503 /* strlensi_unroll_1 returns the address of the zero at the end of
22504 the string, like memchr(), so compute the length by subtracting
22505 the start address. */
22506 emit_insn (ix86_gen_sub3 (out, out, addr));
22512 /* Can't use this if the user has appropriated eax, ecx, or edi. */
22513 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
22516 scratch2 = gen_reg_rtx (Pmode);
22517 scratch3 = gen_reg_rtx (Pmode);
22518 scratch4 = force_reg (Pmode, constm1_rtx);
22520 emit_move_insn (scratch3, addr);
22521 eoschar = force_reg (QImode, eoschar);
22523 src = replace_equiv_address_nv (src, scratch3);
22525 /* If .md starts supporting :P, this can be done in .md. */
22526 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
22527 scratch4), UNSPEC_SCAS);
22528 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
22529 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
22530 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
22535 /* For given symbol (function) construct code to compute address of it's PLT
22536 entry in large x86-64 PIC model. */
22538 construct_plt_address (rtx symbol)
22540 rtx tmp = gen_reg_rtx (Pmode);
22541 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
22543 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
22544 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
22546 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
22547 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
22552 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
22554 rtx pop, bool sibcall)
22556 /* We need to represent that SI and DI registers are clobbered
22558 static int clobbered_registers[] = {
22559 XMM6_REG, XMM7_REG, XMM8_REG,
22560 XMM9_REG, XMM10_REG, XMM11_REG,
22561 XMM12_REG, XMM13_REG, XMM14_REG,
22562 XMM15_REG, SI_REG, DI_REG
22564 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
22565 rtx use = NULL, call;
22566 unsigned int vec_len;
22568 if (pop == const0_rtx)
22570 gcc_assert (!TARGET_64BIT || !pop);
22572 if (TARGET_MACHO && !TARGET_64BIT)
22575 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
22576 fnaddr = machopic_indirect_call_target (fnaddr);
22581 /* Static functions and indirect calls don't need the pic register. */
22582 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
22583 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
22584 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
22585 use_reg (&use, pic_offset_table_rtx);
22588 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
22590 rtx al = gen_rtx_REG (QImode, AX_REG);
22591 emit_move_insn (al, callarg2);
22592 use_reg (&use, al);
22595 if (ix86_cmodel == CM_LARGE_PIC
22597 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
22598 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
22599 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
22601 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
22602 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
22604 fnaddr = XEXP (fnaddr, 0);
22605 if (GET_MODE (fnaddr) != Pmode)
22606 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
22607 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
22611 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
22613 call = gen_rtx_SET (VOIDmode, retval, call);
22614 vec[vec_len++] = call;
22618 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
22619 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
22620 vec[vec_len++] = pop;
22623 if (TARGET_64BIT_MS_ABI
22624 && (!callarg2 || INTVAL (callarg2) != -2))
22628 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
22629 UNSPEC_MS_TO_SYSV_CALL);
22631 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
22633 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
22635 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
22637 clobbered_registers[i]));
22640 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
22641 if (TARGET_VZEROUPPER)
22644 if (cfun->machine->callee_pass_avx256_p)
22646 if (cfun->machine->callee_return_avx256_p)
22647 avx256 = callee_return_pass_avx256;
22649 avx256 = callee_pass_avx256;
22651 else if (cfun->machine->callee_return_avx256_p)
22652 avx256 = callee_return_avx256;
22654 avx256 = call_no_avx256;
22656 if (reload_completed)
22657 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
22659 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
22660 gen_rtvec (1, GEN_INT (avx256)),
22661 UNSPEC_CALL_NEEDS_VZEROUPPER);
22665 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
22666 call = emit_call_insn (call);
22668 CALL_INSN_FUNCTION_USAGE (call) = use;
22674 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
22676 rtx pat = PATTERN (insn);
22677 rtvec vec = XVEC (pat, 0);
22678 int len = GET_NUM_ELEM (vec) - 1;
22680 /* Strip off the last entry of the parallel. */
22681 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
22682 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
22684 pat = RTVEC_ELT (vec, 0);
22686 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
22688 emit_insn (gen_avx_vzeroupper (vzeroupper));
22689 emit_call_insn (pat);
22692 /* Output the assembly for a call instruction. */
22695 ix86_output_call_insn (rtx insn, rtx call_op)
22697 bool direct_p = constant_call_address_operand (call_op, Pmode);
22698 bool seh_nop_p = false;
22701 if (SIBLING_CALL_P (insn))
22705 /* SEH epilogue detection requires the indirect branch case
22706 to include REX.W. */
22707 else if (TARGET_SEH)
22708 xasm = "rex.W jmp %A0";
22712 output_asm_insn (xasm, &call_op);
22716 /* SEH unwinding can require an extra nop to be emitted in several
22717 circumstances. Determine if we have one of those. */
22722 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
22724 /* If we get to another real insn, we don't need the nop. */
22728 /* If we get to the epilogue note, prevent a catch region from
22729 being adjacent to the standard epilogue sequence. If non-
22730 call-exceptions, we'll have done this during epilogue emission. */
22731 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
22732 && !flag_non_call_exceptions
22733 && !can_throw_internal (insn))
22740 /* If we didn't find a real insn following the call, prevent the
22741 unwinder from looking into the next function. */
22747 xasm = "call\t%P0";
22749 xasm = "call\t%A0";
22751 output_asm_insn (xasm, &call_op);
22759 /* Clear stack slot assignments remembered from previous functions.
22760 This is called from INIT_EXPANDERS once before RTL is emitted for each
22763 static struct machine_function *
22764 ix86_init_machine_status (void)
22766 struct machine_function *f;
22768 f = ggc_alloc_cleared_machine_function ();
22769 f->use_fast_prologue_epilogue_nregs = -1;
22770 f->tls_descriptor_call_expanded_p = 0;
22771 f->call_abi = ix86_abi;
22776 /* Return a MEM corresponding to a stack slot with mode MODE.
22777 Allocate a new slot if necessary.
22779 The RTL for a function can have several slots available: N is
22780 which slot to use. */
22783 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
22785 struct stack_local_entry *s;
22787 gcc_assert (n < MAX_386_STACK_LOCALS);
22789 /* Virtual slot is valid only before vregs are instantiated. */
22790 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
22792 for (s = ix86_stack_locals; s; s = s->next)
22793 if (s->mode == mode && s->n == n)
22794 return validize_mem (copy_rtx (s->rtl));
22796 s = ggc_alloc_stack_local_entry ();
22799 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
22801 s->next = ix86_stack_locals;
22802 ix86_stack_locals = s;
22803 return validize_mem (s->rtl);
22806 /* Calculate the length of the memory address in the instruction encoding.
22807 Includes addr32 prefix, does not include the one-byte modrm, opcode,
22808 or other prefixes. */
22811 memory_address_length (rtx addr)
22813 struct ix86_address parts;
22814 rtx base, index, disp;
22818 if (GET_CODE (addr) == PRE_DEC
22819 || GET_CODE (addr) == POST_INC
22820 || GET_CODE (addr) == PRE_MODIFY
22821 || GET_CODE (addr) == POST_MODIFY)
22824 ok = ix86_decompose_address (addr, &parts);
22827 if (parts.base && GET_CODE (parts.base) == SUBREG)
22828 parts.base = SUBREG_REG (parts.base);
22829 if (parts.index && GET_CODE (parts.index) == SUBREG)
22830 parts.index = SUBREG_REG (parts.index);
22833 index = parts.index;
22836 /* Add length of addr32 prefix. */
22837 len = (GET_CODE (addr) == ZERO_EXTEND
22838 || GET_CODE (addr) == AND);
22841 - esp as the base always wants an index,
22842 - ebp as the base always wants a displacement,
22843 - r12 as the base always wants an index,
22844 - r13 as the base always wants a displacement. */
22846 /* Register Indirect. */
22847 if (base && !index && !disp)
22849 /* esp (for its index) and ebp (for its displacement) need
22850 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
22853 && (addr == arg_pointer_rtx
22854 || addr == frame_pointer_rtx
22855 || REGNO (addr) == SP_REG
22856 || REGNO (addr) == BP_REG
22857 || REGNO (addr) == R12_REG
22858 || REGNO (addr) == R13_REG))
22862 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
22863 is not disp32, but disp32(%rip), so for disp32
22864 SIB byte is needed, unless print_operand_address
22865 optimizes it into disp32(%rip) or (%rip) is implied
22867 else if (disp && !base && !index)
22874 if (GET_CODE (disp) == CONST)
22875 symbol = XEXP (disp, 0);
22876 if (GET_CODE (symbol) == PLUS
22877 && CONST_INT_P (XEXP (symbol, 1)))
22878 symbol = XEXP (symbol, 0);
22880 if (GET_CODE (symbol) != LABEL_REF
22881 && (GET_CODE (symbol) != SYMBOL_REF
22882 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
22883 && (GET_CODE (symbol) != UNSPEC
22884 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
22885 && XINT (symbol, 1) != UNSPEC_PCREL
22886 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
22893 /* Find the length of the displacement constant. */
22896 if (base && satisfies_constraint_K (disp))
22901 /* ebp always wants a displacement. Similarly r13. */
22902 else if (base && REG_P (base)
22903 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
22906 /* An index requires the two-byte modrm form.... */
22908 /* ...like esp (or r12), which always wants an index. */
22909 || base == arg_pointer_rtx
22910 || base == frame_pointer_rtx
22911 || (base && REG_P (base)
22912 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
22929 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22930 is set, expect that insn have 8bit immediate alternative. */
22932 ix86_attr_length_immediate_default (rtx insn, bool shortform)
22936 extract_insn_cached (insn);
22937 for (i = recog_data.n_operands - 1; i >= 0; --i)
22938 if (CONSTANT_P (recog_data.operand[i]))
22940 enum attr_mode mode = get_attr_mode (insn);
22943 if (shortform && CONST_INT_P (recog_data.operand[i]))
22945 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
22952 ival = trunc_int_for_mode (ival, HImode);
22955 ival = trunc_int_for_mode (ival, SImode);
22960 if (IN_RANGE (ival, -128, 127))
22977 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22982 fatal_insn ("unknown insn mode", insn);
22987 /* Compute default value for "length_address" attribute. */
22989 ix86_attr_length_address_default (rtx insn)
22993 if (get_attr_type (insn) == TYPE_LEA)
22995 rtx set = PATTERN (insn), addr;
22997 if (GET_CODE (set) == PARALLEL)
22998 set = XVECEXP (set, 0, 0);
23000 gcc_assert (GET_CODE (set) == SET);
23002 addr = SET_SRC (set);
23003 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
23005 if (GET_CODE (addr) == ZERO_EXTEND)
23006 addr = XEXP (addr, 0);
23007 if (GET_CODE (addr) == SUBREG)
23008 addr = SUBREG_REG (addr);
23011 return memory_address_length (addr);
23014 extract_insn_cached (insn);
23015 for (i = recog_data.n_operands - 1; i >= 0; --i)
23016 if (MEM_P (recog_data.operand[i]))
23018 constrain_operands_cached (reload_completed);
23019 if (which_alternative != -1)
23021 const char *constraints = recog_data.constraints[i];
23022 int alt = which_alternative;
23024 while (*constraints == '=' || *constraints == '+')
23027 while (*constraints++ != ',')
23029 /* Skip ignored operands. */
23030 if (*constraints == 'X')
23033 return memory_address_length (XEXP (recog_data.operand[i], 0));
23038 /* Compute default value for "length_vex" attribute. It includes
23039 2 or 3 byte VEX prefix and 1 opcode byte. */
23042 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
23046 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23047 byte VEX prefix. */
23048 if (!has_0f_opcode || has_vex_w)
23051 /* We can always use 2 byte VEX prefix in 32bit. */
23055 extract_insn_cached (insn);
23057 for (i = recog_data.n_operands - 1; i >= 0; --i)
23058 if (REG_P (recog_data.operand[i]))
23060 /* REX.W bit uses 3 byte VEX prefix. */
23061 if (GET_MODE (recog_data.operand[i]) == DImode
23062 && GENERAL_REG_P (recog_data.operand[i]))
23067 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23068 if (MEM_P (recog_data.operand[i])
23069 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
23076 /* Return the maximum number of instructions a cpu can issue. */
23079 ix86_issue_rate (void)
23083 case PROCESSOR_PENTIUM:
23084 case PROCESSOR_ATOM:
23088 case PROCESSOR_PENTIUMPRO:
23089 case PROCESSOR_PENTIUM4:
23090 case PROCESSOR_CORE2_32:
23091 case PROCESSOR_CORE2_64:
23092 case PROCESSOR_COREI7_32:
23093 case PROCESSOR_COREI7_64:
23094 case PROCESSOR_ATHLON:
23096 case PROCESSOR_AMDFAM10:
23097 case PROCESSOR_NOCONA:
23098 case PROCESSOR_GENERIC32:
23099 case PROCESSOR_GENERIC64:
23100 case PROCESSOR_BDVER1:
23101 case PROCESSOR_BDVER2:
23102 case PROCESSOR_BTVER1:
23110 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23111 by DEP_INSN and nothing set by DEP_INSN. */
23114 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
23118 /* Simplify the test for uninteresting insns. */
23119 if (insn_type != TYPE_SETCC
23120 && insn_type != TYPE_ICMOV
23121 && insn_type != TYPE_FCMOV
23122 && insn_type != TYPE_IBR)
23125 if ((set = single_set (dep_insn)) != 0)
23127 set = SET_DEST (set);
23130 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
23131 && XVECLEN (PATTERN (dep_insn), 0) == 2
23132 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
23133 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
23135 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
23136 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
23141 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
23144 /* This test is true if the dependent insn reads the flags but
23145 not any other potentially set register. */
23146 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
23149 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
23155 /* Return true iff USE_INSN has a memory address with operands set by
23159 ix86_agi_dependent (rtx set_insn, rtx use_insn)
23162 extract_insn_cached (use_insn);
23163 for (i = recog_data.n_operands - 1; i >= 0; --i)
23164 if (MEM_P (recog_data.operand[i]))
23166 rtx addr = XEXP (recog_data.operand[i], 0);
23167 return modified_in_p (addr, set_insn) != 0;
23173 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
23175 enum attr_type insn_type, dep_insn_type;
23176 enum attr_memory memory;
23178 int dep_insn_code_number;
23180 /* Anti and output dependencies have zero cost on all CPUs. */
23181 if (REG_NOTE_KIND (link) != 0)
23184 dep_insn_code_number = recog_memoized (dep_insn);
23186 /* If we can't recognize the insns, we can't really do anything. */
23187 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
23190 insn_type = get_attr_type (insn);
23191 dep_insn_type = get_attr_type (dep_insn);
23195 case PROCESSOR_PENTIUM:
23196 /* Address Generation Interlock adds a cycle of latency. */
23197 if (insn_type == TYPE_LEA)
23199 rtx addr = PATTERN (insn);
23201 if (GET_CODE (addr) == PARALLEL)
23202 addr = XVECEXP (addr, 0, 0);
23204 gcc_assert (GET_CODE (addr) == SET);
23206 addr = SET_SRC (addr);
23207 if (modified_in_p (addr, dep_insn))
23210 else if (ix86_agi_dependent (dep_insn, insn))
23213 /* ??? Compares pair with jump/setcc. */
23214 if (ix86_flags_dependent (insn, dep_insn, insn_type))
23217 /* Floating point stores require value to be ready one cycle earlier. */
23218 if (insn_type == TYPE_FMOV
23219 && get_attr_memory (insn) == MEMORY_STORE
23220 && !ix86_agi_dependent (dep_insn, insn))
23224 case PROCESSOR_PENTIUMPRO:
23225 memory = get_attr_memory (insn);
23227 /* INT->FP conversion is expensive. */
23228 if (get_attr_fp_int_src (dep_insn))
23231 /* There is one cycle extra latency between an FP op and a store. */
23232 if (insn_type == TYPE_FMOV
23233 && (set = single_set (dep_insn)) != NULL_RTX
23234 && (set2 = single_set (insn)) != NULL_RTX
23235 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
23236 && MEM_P (SET_DEST (set2)))
23239 /* Show ability of reorder buffer to hide latency of load by executing
23240 in parallel with previous instruction in case
23241 previous instruction is not needed to compute the address. */
23242 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23243 && !ix86_agi_dependent (dep_insn, insn))
23245 /* Claim moves to take one cycle, as core can issue one load
23246 at time and the next load can start cycle later. */
23247 if (dep_insn_type == TYPE_IMOV
23248 || dep_insn_type == TYPE_FMOV)
23256 memory = get_attr_memory (insn);
23258 /* The esp dependency is resolved before the instruction is really
23260 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
23261 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
23264 /* INT->FP conversion is expensive. */
23265 if (get_attr_fp_int_src (dep_insn))
23268 /* Show ability of reorder buffer to hide latency of load by executing
23269 in parallel with previous instruction in case
23270 previous instruction is not needed to compute the address. */
23271 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23272 && !ix86_agi_dependent (dep_insn, insn))
23274 /* Claim moves to take one cycle, as core can issue one load
23275 at time and the next load can start cycle later. */
23276 if (dep_insn_type == TYPE_IMOV
23277 || dep_insn_type == TYPE_FMOV)
23286 case PROCESSOR_ATHLON:
23288 case PROCESSOR_AMDFAM10:
23289 case PROCESSOR_BDVER1:
23290 case PROCESSOR_BDVER2:
23291 case PROCESSOR_BTVER1:
23292 case PROCESSOR_ATOM:
23293 case PROCESSOR_GENERIC32:
23294 case PROCESSOR_GENERIC64:
23295 memory = get_attr_memory (insn);
23297 /* Show ability of reorder buffer to hide latency of load by executing
23298 in parallel with previous instruction in case
23299 previous instruction is not needed to compute the address. */
23300 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23301 && !ix86_agi_dependent (dep_insn, insn))
23303 enum attr_unit unit = get_attr_unit (insn);
23306 /* Because of the difference between the length of integer and
23307 floating unit pipeline preparation stages, the memory operands
23308 for floating point are cheaper.
23310 ??? For Athlon it the difference is most probably 2. */
23311 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
23314 loadcost = TARGET_ATHLON ? 2 : 0;
23316 if (cost >= loadcost)
23329 /* How many alternative schedules to try. This should be as wide as the
23330 scheduling freedom in the DFA, but no wider. Making this value too
23331 large results extra work for the scheduler. */
23334 ia32_multipass_dfa_lookahead (void)
23338 case PROCESSOR_PENTIUM:
23341 case PROCESSOR_PENTIUMPRO:
23345 case PROCESSOR_CORE2_32:
23346 case PROCESSOR_CORE2_64:
23347 case PROCESSOR_COREI7_32:
23348 case PROCESSOR_COREI7_64:
23349 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23350 as many instructions can be executed on a cycle, i.e.,
23351 issue_rate. I wonder why tuning for many CPUs does not do this. */
23352 return ix86_issue_rate ();
23361 /* Model decoder of Core 2/i7.
23362 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
23363 track the instruction fetch block boundaries and make sure that long
23364 (9+ bytes) instructions are assigned to D0. */
23366 /* Maximum length of an insn that can be handled by
23367 a secondary decoder unit. '8' for Core 2/i7. */
23368 static int core2i7_secondary_decoder_max_insn_size;
23370 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
23371 '16' for Core 2/i7. */
23372 static int core2i7_ifetch_block_size;
23374 /* Maximum number of instructions decoder can handle per cycle.
23375 '6' for Core 2/i7. */
23376 static int core2i7_ifetch_block_max_insns;
23378 typedef struct ix86_first_cycle_multipass_data_ *
23379 ix86_first_cycle_multipass_data_t;
23380 typedef const struct ix86_first_cycle_multipass_data_ *
23381 const_ix86_first_cycle_multipass_data_t;
23383 /* A variable to store target state across calls to max_issue within
23385 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
23386 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
23388 /* Initialize DATA. */
23390 core2i7_first_cycle_multipass_init (void *_data)
23392 ix86_first_cycle_multipass_data_t data
23393 = (ix86_first_cycle_multipass_data_t) _data;
23395 data->ifetch_block_len = 0;
23396 data->ifetch_block_n_insns = 0;
23397 data->ready_try_change = NULL;
23398 data->ready_try_change_size = 0;
23401 /* Advancing the cycle; reset ifetch block counts. */
23403 core2i7_dfa_post_advance_cycle (void)
23405 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
23407 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
23409 data->ifetch_block_len = 0;
23410 data->ifetch_block_n_insns = 0;
23413 static int min_insn_size (rtx);
23415 /* Filter out insns from ready_try that the core will not be able to issue
23416 on current cycle due to decoder. */
23418 core2i7_first_cycle_multipass_filter_ready_try
23419 (const_ix86_first_cycle_multipass_data_t data,
23420 char *ready_try, int n_ready, bool first_cycle_insn_p)
23427 if (ready_try[n_ready])
23430 insn = get_ready_element (n_ready);
23431 insn_size = min_insn_size (insn);
23433 if (/* If this is a too long an insn for a secondary decoder ... */
23434 (!first_cycle_insn_p
23435 && insn_size > core2i7_secondary_decoder_max_insn_size)
23436 /* ... or it would not fit into the ifetch block ... */
23437 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
23438 /* ... or the decoder is full already ... */
23439 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
23440 /* ... mask the insn out. */
23442 ready_try[n_ready] = 1;
23444 if (data->ready_try_change)
23445 SET_BIT (data->ready_try_change, n_ready);
23450 /* Prepare for a new round of multipass lookahead scheduling. */
23452 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
23453 bool first_cycle_insn_p)
23455 ix86_first_cycle_multipass_data_t data
23456 = (ix86_first_cycle_multipass_data_t) _data;
23457 const_ix86_first_cycle_multipass_data_t prev_data
23458 = ix86_first_cycle_multipass_data;
23460 /* Restore the state from the end of the previous round. */
23461 data->ifetch_block_len = prev_data->ifetch_block_len;
23462 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
23464 /* Filter instructions that cannot be issued on current cycle due to
23465 decoder restrictions. */
23466 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
23467 first_cycle_insn_p);
23470 /* INSN is being issued in current solution. Account for its impact on
23471 the decoder model. */
23473 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
23474 rtx insn, const void *_prev_data)
23476 ix86_first_cycle_multipass_data_t data
23477 = (ix86_first_cycle_multipass_data_t) _data;
23478 const_ix86_first_cycle_multipass_data_t prev_data
23479 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
23481 int insn_size = min_insn_size (insn);
23483 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
23484 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
23485 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
23486 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
23488 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
23489 if (!data->ready_try_change)
23491 data->ready_try_change = sbitmap_alloc (n_ready);
23492 data->ready_try_change_size = n_ready;
23494 else if (data->ready_try_change_size < n_ready)
23496 data->ready_try_change = sbitmap_resize (data->ready_try_change,
23498 data->ready_try_change_size = n_ready;
23500 sbitmap_zero (data->ready_try_change);
23502 /* Filter out insns from ready_try that the core will not be able to issue
23503 on current cycle due to decoder. */
23504 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
23508 /* Revert the effect on ready_try. */
23510 core2i7_first_cycle_multipass_backtrack (const void *_data,
23512 int n_ready ATTRIBUTE_UNUSED)
23514 const_ix86_first_cycle_multipass_data_t data
23515 = (const_ix86_first_cycle_multipass_data_t) _data;
23516 unsigned int i = 0;
23517 sbitmap_iterator sbi;
23519 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
23520 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
23526 /* Save the result of multipass lookahead scheduling for the next round. */
23528 core2i7_first_cycle_multipass_end (const void *_data)
23530 const_ix86_first_cycle_multipass_data_t data
23531 = (const_ix86_first_cycle_multipass_data_t) _data;
23532 ix86_first_cycle_multipass_data_t next_data
23533 = ix86_first_cycle_multipass_data;
23537 next_data->ifetch_block_len = data->ifetch_block_len;
23538 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
23542 /* Deallocate target data. */
23544 core2i7_first_cycle_multipass_fini (void *_data)
23546 ix86_first_cycle_multipass_data_t data
23547 = (ix86_first_cycle_multipass_data_t) _data;
23549 if (data->ready_try_change)
23551 sbitmap_free (data->ready_try_change);
23552 data->ready_try_change = NULL;
23553 data->ready_try_change_size = 0;
23557 /* Prepare for scheduling pass. */
23559 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
23560 int verbose ATTRIBUTE_UNUSED,
23561 int max_uid ATTRIBUTE_UNUSED)
23563 /* Install scheduling hooks for current CPU. Some of these hooks are used
23564 in time-critical parts of the scheduler, so we only set them up when
23565 they are actually used. */
23568 case PROCESSOR_CORE2_32:
23569 case PROCESSOR_CORE2_64:
23570 case PROCESSOR_COREI7_32:
23571 case PROCESSOR_COREI7_64:
23572 targetm.sched.dfa_post_advance_cycle
23573 = core2i7_dfa_post_advance_cycle;
23574 targetm.sched.first_cycle_multipass_init
23575 = core2i7_first_cycle_multipass_init;
23576 targetm.sched.first_cycle_multipass_begin
23577 = core2i7_first_cycle_multipass_begin;
23578 targetm.sched.first_cycle_multipass_issue
23579 = core2i7_first_cycle_multipass_issue;
23580 targetm.sched.first_cycle_multipass_backtrack
23581 = core2i7_first_cycle_multipass_backtrack;
23582 targetm.sched.first_cycle_multipass_end
23583 = core2i7_first_cycle_multipass_end;
23584 targetm.sched.first_cycle_multipass_fini
23585 = core2i7_first_cycle_multipass_fini;
23587 /* Set decoder parameters. */
23588 core2i7_secondary_decoder_max_insn_size = 8;
23589 core2i7_ifetch_block_size = 16;
23590 core2i7_ifetch_block_max_insns = 6;
23594 targetm.sched.dfa_post_advance_cycle = NULL;
23595 targetm.sched.first_cycle_multipass_init = NULL;
23596 targetm.sched.first_cycle_multipass_begin = NULL;
23597 targetm.sched.first_cycle_multipass_issue = NULL;
23598 targetm.sched.first_cycle_multipass_backtrack = NULL;
23599 targetm.sched.first_cycle_multipass_end = NULL;
23600 targetm.sched.first_cycle_multipass_fini = NULL;
23606 /* Compute the alignment given to a constant that is being placed in memory.
23607 EXP is the constant and ALIGN is the alignment that the object would
23609 The value of this function is used instead of that alignment to align
23613 ix86_constant_alignment (tree exp, int align)
23615 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
23616 || TREE_CODE (exp) == INTEGER_CST)
23618 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
23620 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
23623 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
23624 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
23625 return BITS_PER_WORD;
23630 /* Compute the alignment for a static variable.
23631 TYPE is the data type, and ALIGN is the alignment that
23632 the object would ordinarily have. The value of this function is used
23633 instead of that alignment to align the object. */
23636 ix86_data_alignment (tree type, int align)
23638 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
23640 if (AGGREGATE_TYPE_P (type)
23641 && TYPE_SIZE (type)
23642 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23643 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
23644 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
23645 && align < max_align)
23648 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23649 to 16byte boundary. */
23652 if (AGGREGATE_TYPE_P (type)
23653 && TYPE_SIZE (type)
23654 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23655 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
23656 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23660 if (TREE_CODE (type) == ARRAY_TYPE)
23662 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23664 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23667 else if (TREE_CODE (type) == COMPLEX_TYPE)
23670 if (TYPE_MODE (type) == DCmode && align < 64)
23672 if ((TYPE_MODE (type) == XCmode
23673 || TYPE_MODE (type) == TCmode) && align < 128)
23676 else if ((TREE_CODE (type) == RECORD_TYPE
23677 || TREE_CODE (type) == UNION_TYPE
23678 || TREE_CODE (type) == QUAL_UNION_TYPE)
23679 && TYPE_FIELDS (type))
23681 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23683 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23686 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23687 || TREE_CODE (type) == INTEGER_TYPE)
23689 if (TYPE_MODE (type) == DFmode && align < 64)
23691 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23698 /* Compute the alignment for a local variable or a stack slot. EXP is
23699 the data type or decl itself, MODE is the widest mode available and
23700 ALIGN is the alignment that the object would ordinarily have. The
23701 value of this macro is used instead of that alignment to align the
23705 ix86_local_alignment (tree exp, enum machine_mode mode,
23706 unsigned int align)
23710 if (exp && DECL_P (exp))
23712 type = TREE_TYPE (exp);
23721 /* Don't do dynamic stack realignment for long long objects with
23722 -mpreferred-stack-boundary=2. */
23725 && ix86_preferred_stack_boundary < 64
23726 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
23727 && (!type || !TYPE_USER_ALIGN (type))
23728 && (!decl || !DECL_USER_ALIGN (decl)))
23731 /* If TYPE is NULL, we are allocating a stack slot for caller-save
23732 register in MODE. We will return the largest alignment of XF
23736 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
23737 align = GET_MODE_ALIGNMENT (DFmode);
23741 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23742 to 16byte boundary. Exact wording is:
23744 An array uses the same alignment as its elements, except that a local or
23745 global array variable of length at least 16 bytes or
23746 a C99 variable-length array variable always has alignment of at least 16 bytes.
23748 This was added to allow use of aligned SSE instructions at arrays. This
23749 rule is meant for static storage (where compiler can not do the analysis
23750 by itself). We follow it for automatic variables only when convenient.
23751 We fully control everything in the function compiled and functions from
23752 other unit can not rely on the alignment.
23754 Exclude va_list type. It is the common case of local array where
23755 we can not benefit from the alignment. */
23756 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
23759 if (AGGREGATE_TYPE_P (type)
23760 && (va_list_type_node == NULL_TREE
23761 || (TYPE_MAIN_VARIANT (type)
23762 != TYPE_MAIN_VARIANT (va_list_type_node)))
23763 && TYPE_SIZE (type)
23764 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23765 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
23766 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23769 if (TREE_CODE (type) == ARRAY_TYPE)
23771 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23773 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23776 else if (TREE_CODE (type) == COMPLEX_TYPE)
23778 if (TYPE_MODE (type) == DCmode && align < 64)
23780 if ((TYPE_MODE (type) == XCmode
23781 || TYPE_MODE (type) == TCmode) && align < 128)
23784 else if ((TREE_CODE (type) == RECORD_TYPE
23785 || TREE_CODE (type) == UNION_TYPE
23786 || TREE_CODE (type) == QUAL_UNION_TYPE)
23787 && TYPE_FIELDS (type))
23789 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23791 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23794 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23795 || TREE_CODE (type) == INTEGER_TYPE)
23798 if (TYPE_MODE (type) == DFmode && align < 64)
23800 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23806 /* Compute the minimum required alignment for dynamic stack realignment
23807 purposes for a local variable, parameter or a stack slot. EXP is
23808 the data type or decl itself, MODE is its mode and ALIGN is the
23809 alignment that the object would ordinarily have. */
23812 ix86_minimum_alignment (tree exp, enum machine_mode mode,
23813 unsigned int align)
23817 if (exp && DECL_P (exp))
23819 type = TREE_TYPE (exp);
23828 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
23831 /* Don't do dynamic stack realignment for long long objects with
23832 -mpreferred-stack-boundary=2. */
23833 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
23834 && (!type || !TYPE_USER_ALIGN (type))
23835 && (!decl || !DECL_USER_ALIGN (decl)))
23841 /* Find a location for the static chain incoming to a nested function.
23842 This is a register, unless all free registers are used by arguments. */
23845 ix86_static_chain (const_tree fndecl, bool incoming_p)
23849 if (!DECL_STATIC_CHAIN (fndecl))
23854 /* We always use R10 in 64-bit mode. */
23862 /* By default in 32-bit mode we use ECX to pass the static chain. */
23865 fntype = TREE_TYPE (fndecl);
23866 ccvt = ix86_get_callcvt (fntype);
23867 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
23869 /* Fastcall functions use ecx/edx for arguments, which leaves
23870 us with EAX for the static chain.
23871 Thiscall functions use ecx for arguments, which also
23872 leaves us with EAX for the static chain. */
23875 else if (ix86_function_regparm (fntype, fndecl) == 3)
23877 /* For regparm 3, we have no free call-clobbered registers in
23878 which to store the static chain. In order to implement this,
23879 we have the trampoline push the static chain to the stack.
23880 However, we can't push a value below the return address when
23881 we call the nested function directly, so we have to use an
23882 alternate entry point. For this we use ESI, and have the
23883 alternate entry point push ESI, so that things appear the
23884 same once we're executing the nested function. */
23887 if (fndecl == current_function_decl)
23888 ix86_static_chain_on_stack = true;
23889 return gen_frame_mem (SImode,
23890 plus_constant (arg_pointer_rtx, -8));
23896 return gen_rtx_REG (Pmode, regno);
23899 /* Emit RTL insns to initialize the variable parts of a trampoline.
23900 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23901 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23902 to be passed to the target function. */
23905 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
23911 fnaddr = XEXP (DECL_RTL (fndecl), 0);
23917 /* Load the function address to r11. Try to load address using
23918 the shorter movl instead of movabs. We may want to support
23919 movq for kernel mode, but kernel does not use trampolines at
23921 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
23923 fnaddr = copy_to_mode_reg (DImode, fnaddr);
23925 mem = adjust_address (m_tramp, HImode, offset);
23926 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
23928 mem = adjust_address (m_tramp, SImode, offset + 2);
23929 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
23934 mem = adjust_address (m_tramp, HImode, offset);
23935 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
23937 mem = adjust_address (m_tramp, DImode, offset + 2);
23938 emit_move_insn (mem, fnaddr);
23942 /* Load static chain using movabs to r10. Use the
23943 shorter movl instead of movabs for x32. */
23955 mem = adjust_address (m_tramp, HImode, offset);
23956 emit_move_insn (mem, gen_int_mode (opcode, HImode));
23958 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
23959 emit_move_insn (mem, chain_value);
23962 /* Jump to r11; the last (unused) byte is a nop, only there to
23963 pad the write out to a single 32-bit store. */
23964 mem = adjust_address (m_tramp, SImode, offset);
23965 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
23972 /* Depending on the static chain location, either load a register
23973 with a constant, or push the constant to the stack. All of the
23974 instructions are the same size. */
23975 chain = ix86_static_chain (fndecl, true);
23978 switch (REGNO (chain))
23981 opcode = 0xb8; break;
23983 opcode = 0xb9; break;
23985 gcc_unreachable ();
23991 mem = adjust_address (m_tramp, QImode, offset);
23992 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23994 mem = adjust_address (m_tramp, SImode, offset + 1);
23995 emit_move_insn (mem, chain_value);
23998 mem = adjust_address (m_tramp, QImode, offset);
23999 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
24001 mem = adjust_address (m_tramp, SImode, offset + 1);
24003 /* Compute offset from the end of the jmp to the target function.
24004 In the case in which the trampoline stores the static chain on
24005 the stack, we need to skip the first insn which pushes the
24006 (call-saved) register static chain; this push is 1 byte. */
24008 disp = expand_binop (SImode, sub_optab, fnaddr,
24009 plus_constant (XEXP (m_tramp, 0),
24010 offset - (MEM_P (chain) ? 1 : 0)),
24011 NULL_RTX, 1, OPTAB_DIRECT);
24012 emit_move_insn (mem, disp);
24015 gcc_assert (offset <= TRAMPOLINE_SIZE);
24017 #ifdef HAVE_ENABLE_EXECUTE_STACK
24018 #ifdef CHECK_EXECUTE_STACK_ENABLED
24019 if (CHECK_EXECUTE_STACK_ENABLED)
24021 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
24022 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
24026 /* The following file contains several enumerations and data structures
24027 built from the definitions in i386-builtin-types.def. */
24029 #include "i386-builtin-types.inc"
24031 /* Table for the ix86 builtin non-function types. */
24032 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
24034 /* Retrieve an element from the above table, building some of
24035 the types lazily. */
24038 ix86_get_builtin_type (enum ix86_builtin_type tcode)
24040 unsigned int index;
24043 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
24045 type = ix86_builtin_type_tab[(int) tcode];
24049 gcc_assert (tcode > IX86_BT_LAST_PRIM);
24050 if (tcode <= IX86_BT_LAST_VECT)
24052 enum machine_mode mode;
24054 index = tcode - IX86_BT_LAST_PRIM - 1;
24055 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
24056 mode = ix86_builtin_type_vect_mode[index];
24058 type = build_vector_type_for_mode (itype, mode);
24064 index = tcode - IX86_BT_LAST_VECT - 1;
24065 if (tcode <= IX86_BT_LAST_PTR)
24066 quals = TYPE_UNQUALIFIED;
24068 quals = TYPE_QUAL_CONST;
24070 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
24071 if (quals != TYPE_UNQUALIFIED)
24072 itype = build_qualified_type (itype, quals);
24074 type = build_pointer_type (itype);
24077 ix86_builtin_type_tab[(int) tcode] = type;
24081 /* Table for the ix86 builtin function types. */
24082 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
24084 /* Retrieve an element from the above table, building some of
24085 the types lazily. */
24088 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
24092 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
24094 type = ix86_builtin_func_type_tab[(int) tcode];
24098 if (tcode <= IX86_BT_LAST_FUNC)
24100 unsigned start = ix86_builtin_func_start[(int) tcode];
24101 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
24102 tree rtype, atype, args = void_list_node;
24105 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
24106 for (i = after - 1; i > start; --i)
24108 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
24109 args = tree_cons (NULL, atype, args);
24112 type = build_function_type (rtype, args);
24116 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
24117 enum ix86_builtin_func_type icode;
24119 icode = ix86_builtin_func_alias_base[index];
24120 type = ix86_get_builtin_func_type (icode);
24123 ix86_builtin_func_type_tab[(int) tcode] = type;
24128 /* Codes for all the SSE/MMX builtins. */
24131 IX86_BUILTIN_ADDPS,
24132 IX86_BUILTIN_ADDSS,
24133 IX86_BUILTIN_DIVPS,
24134 IX86_BUILTIN_DIVSS,
24135 IX86_BUILTIN_MULPS,
24136 IX86_BUILTIN_MULSS,
24137 IX86_BUILTIN_SUBPS,
24138 IX86_BUILTIN_SUBSS,
24140 IX86_BUILTIN_CMPEQPS,
24141 IX86_BUILTIN_CMPLTPS,
24142 IX86_BUILTIN_CMPLEPS,
24143 IX86_BUILTIN_CMPGTPS,
24144 IX86_BUILTIN_CMPGEPS,
24145 IX86_BUILTIN_CMPNEQPS,
24146 IX86_BUILTIN_CMPNLTPS,
24147 IX86_BUILTIN_CMPNLEPS,
24148 IX86_BUILTIN_CMPNGTPS,
24149 IX86_BUILTIN_CMPNGEPS,
24150 IX86_BUILTIN_CMPORDPS,
24151 IX86_BUILTIN_CMPUNORDPS,
24152 IX86_BUILTIN_CMPEQSS,
24153 IX86_BUILTIN_CMPLTSS,
24154 IX86_BUILTIN_CMPLESS,
24155 IX86_BUILTIN_CMPNEQSS,
24156 IX86_BUILTIN_CMPNLTSS,
24157 IX86_BUILTIN_CMPNLESS,
24158 IX86_BUILTIN_CMPNGTSS,
24159 IX86_BUILTIN_CMPNGESS,
24160 IX86_BUILTIN_CMPORDSS,
24161 IX86_BUILTIN_CMPUNORDSS,
24163 IX86_BUILTIN_COMIEQSS,
24164 IX86_BUILTIN_COMILTSS,
24165 IX86_BUILTIN_COMILESS,
24166 IX86_BUILTIN_COMIGTSS,
24167 IX86_BUILTIN_COMIGESS,
24168 IX86_BUILTIN_COMINEQSS,
24169 IX86_BUILTIN_UCOMIEQSS,
24170 IX86_BUILTIN_UCOMILTSS,
24171 IX86_BUILTIN_UCOMILESS,
24172 IX86_BUILTIN_UCOMIGTSS,
24173 IX86_BUILTIN_UCOMIGESS,
24174 IX86_BUILTIN_UCOMINEQSS,
24176 IX86_BUILTIN_CVTPI2PS,
24177 IX86_BUILTIN_CVTPS2PI,
24178 IX86_BUILTIN_CVTSI2SS,
24179 IX86_BUILTIN_CVTSI642SS,
24180 IX86_BUILTIN_CVTSS2SI,
24181 IX86_BUILTIN_CVTSS2SI64,
24182 IX86_BUILTIN_CVTTPS2PI,
24183 IX86_BUILTIN_CVTTSS2SI,
24184 IX86_BUILTIN_CVTTSS2SI64,
24186 IX86_BUILTIN_MAXPS,
24187 IX86_BUILTIN_MAXSS,
24188 IX86_BUILTIN_MINPS,
24189 IX86_BUILTIN_MINSS,
24191 IX86_BUILTIN_LOADUPS,
24192 IX86_BUILTIN_STOREUPS,
24193 IX86_BUILTIN_MOVSS,
24195 IX86_BUILTIN_MOVHLPS,
24196 IX86_BUILTIN_MOVLHPS,
24197 IX86_BUILTIN_LOADHPS,
24198 IX86_BUILTIN_LOADLPS,
24199 IX86_BUILTIN_STOREHPS,
24200 IX86_BUILTIN_STORELPS,
24202 IX86_BUILTIN_MASKMOVQ,
24203 IX86_BUILTIN_MOVMSKPS,
24204 IX86_BUILTIN_PMOVMSKB,
24206 IX86_BUILTIN_MOVNTPS,
24207 IX86_BUILTIN_MOVNTQ,
24209 IX86_BUILTIN_LOADDQU,
24210 IX86_BUILTIN_STOREDQU,
24212 IX86_BUILTIN_PACKSSWB,
24213 IX86_BUILTIN_PACKSSDW,
24214 IX86_BUILTIN_PACKUSWB,
24216 IX86_BUILTIN_PADDB,
24217 IX86_BUILTIN_PADDW,
24218 IX86_BUILTIN_PADDD,
24219 IX86_BUILTIN_PADDQ,
24220 IX86_BUILTIN_PADDSB,
24221 IX86_BUILTIN_PADDSW,
24222 IX86_BUILTIN_PADDUSB,
24223 IX86_BUILTIN_PADDUSW,
24224 IX86_BUILTIN_PSUBB,
24225 IX86_BUILTIN_PSUBW,
24226 IX86_BUILTIN_PSUBD,
24227 IX86_BUILTIN_PSUBQ,
24228 IX86_BUILTIN_PSUBSB,
24229 IX86_BUILTIN_PSUBSW,
24230 IX86_BUILTIN_PSUBUSB,
24231 IX86_BUILTIN_PSUBUSW,
24234 IX86_BUILTIN_PANDN,
24238 IX86_BUILTIN_PAVGB,
24239 IX86_BUILTIN_PAVGW,
24241 IX86_BUILTIN_PCMPEQB,
24242 IX86_BUILTIN_PCMPEQW,
24243 IX86_BUILTIN_PCMPEQD,
24244 IX86_BUILTIN_PCMPGTB,
24245 IX86_BUILTIN_PCMPGTW,
24246 IX86_BUILTIN_PCMPGTD,
24248 IX86_BUILTIN_PMADDWD,
24250 IX86_BUILTIN_PMAXSW,
24251 IX86_BUILTIN_PMAXUB,
24252 IX86_BUILTIN_PMINSW,
24253 IX86_BUILTIN_PMINUB,
24255 IX86_BUILTIN_PMULHUW,
24256 IX86_BUILTIN_PMULHW,
24257 IX86_BUILTIN_PMULLW,
24259 IX86_BUILTIN_PSADBW,
24260 IX86_BUILTIN_PSHUFW,
24262 IX86_BUILTIN_PSLLW,
24263 IX86_BUILTIN_PSLLD,
24264 IX86_BUILTIN_PSLLQ,
24265 IX86_BUILTIN_PSRAW,
24266 IX86_BUILTIN_PSRAD,
24267 IX86_BUILTIN_PSRLW,
24268 IX86_BUILTIN_PSRLD,
24269 IX86_BUILTIN_PSRLQ,
24270 IX86_BUILTIN_PSLLWI,
24271 IX86_BUILTIN_PSLLDI,
24272 IX86_BUILTIN_PSLLQI,
24273 IX86_BUILTIN_PSRAWI,
24274 IX86_BUILTIN_PSRADI,
24275 IX86_BUILTIN_PSRLWI,
24276 IX86_BUILTIN_PSRLDI,
24277 IX86_BUILTIN_PSRLQI,
24279 IX86_BUILTIN_PUNPCKHBW,
24280 IX86_BUILTIN_PUNPCKHWD,
24281 IX86_BUILTIN_PUNPCKHDQ,
24282 IX86_BUILTIN_PUNPCKLBW,
24283 IX86_BUILTIN_PUNPCKLWD,
24284 IX86_BUILTIN_PUNPCKLDQ,
24286 IX86_BUILTIN_SHUFPS,
24288 IX86_BUILTIN_RCPPS,
24289 IX86_BUILTIN_RCPSS,
24290 IX86_BUILTIN_RSQRTPS,
24291 IX86_BUILTIN_RSQRTPS_NR,
24292 IX86_BUILTIN_RSQRTSS,
24293 IX86_BUILTIN_RSQRTF,
24294 IX86_BUILTIN_SQRTPS,
24295 IX86_BUILTIN_SQRTPS_NR,
24296 IX86_BUILTIN_SQRTSS,
24298 IX86_BUILTIN_UNPCKHPS,
24299 IX86_BUILTIN_UNPCKLPS,
24301 IX86_BUILTIN_ANDPS,
24302 IX86_BUILTIN_ANDNPS,
24304 IX86_BUILTIN_XORPS,
24307 IX86_BUILTIN_LDMXCSR,
24308 IX86_BUILTIN_STMXCSR,
24309 IX86_BUILTIN_SFENCE,
24311 /* 3DNow! Original */
24312 IX86_BUILTIN_FEMMS,
24313 IX86_BUILTIN_PAVGUSB,
24314 IX86_BUILTIN_PF2ID,
24315 IX86_BUILTIN_PFACC,
24316 IX86_BUILTIN_PFADD,
24317 IX86_BUILTIN_PFCMPEQ,
24318 IX86_BUILTIN_PFCMPGE,
24319 IX86_BUILTIN_PFCMPGT,
24320 IX86_BUILTIN_PFMAX,
24321 IX86_BUILTIN_PFMIN,
24322 IX86_BUILTIN_PFMUL,
24323 IX86_BUILTIN_PFRCP,
24324 IX86_BUILTIN_PFRCPIT1,
24325 IX86_BUILTIN_PFRCPIT2,
24326 IX86_BUILTIN_PFRSQIT1,
24327 IX86_BUILTIN_PFRSQRT,
24328 IX86_BUILTIN_PFSUB,
24329 IX86_BUILTIN_PFSUBR,
24330 IX86_BUILTIN_PI2FD,
24331 IX86_BUILTIN_PMULHRW,
24333 /* 3DNow! Athlon Extensions */
24334 IX86_BUILTIN_PF2IW,
24335 IX86_BUILTIN_PFNACC,
24336 IX86_BUILTIN_PFPNACC,
24337 IX86_BUILTIN_PI2FW,
24338 IX86_BUILTIN_PSWAPDSI,
24339 IX86_BUILTIN_PSWAPDSF,
24342 IX86_BUILTIN_ADDPD,
24343 IX86_BUILTIN_ADDSD,
24344 IX86_BUILTIN_DIVPD,
24345 IX86_BUILTIN_DIVSD,
24346 IX86_BUILTIN_MULPD,
24347 IX86_BUILTIN_MULSD,
24348 IX86_BUILTIN_SUBPD,
24349 IX86_BUILTIN_SUBSD,
24351 IX86_BUILTIN_CMPEQPD,
24352 IX86_BUILTIN_CMPLTPD,
24353 IX86_BUILTIN_CMPLEPD,
24354 IX86_BUILTIN_CMPGTPD,
24355 IX86_BUILTIN_CMPGEPD,
24356 IX86_BUILTIN_CMPNEQPD,
24357 IX86_BUILTIN_CMPNLTPD,
24358 IX86_BUILTIN_CMPNLEPD,
24359 IX86_BUILTIN_CMPNGTPD,
24360 IX86_BUILTIN_CMPNGEPD,
24361 IX86_BUILTIN_CMPORDPD,
24362 IX86_BUILTIN_CMPUNORDPD,
24363 IX86_BUILTIN_CMPEQSD,
24364 IX86_BUILTIN_CMPLTSD,
24365 IX86_BUILTIN_CMPLESD,
24366 IX86_BUILTIN_CMPNEQSD,
24367 IX86_BUILTIN_CMPNLTSD,
24368 IX86_BUILTIN_CMPNLESD,
24369 IX86_BUILTIN_CMPORDSD,
24370 IX86_BUILTIN_CMPUNORDSD,
24372 IX86_BUILTIN_COMIEQSD,
24373 IX86_BUILTIN_COMILTSD,
24374 IX86_BUILTIN_COMILESD,
24375 IX86_BUILTIN_COMIGTSD,
24376 IX86_BUILTIN_COMIGESD,
24377 IX86_BUILTIN_COMINEQSD,
24378 IX86_BUILTIN_UCOMIEQSD,
24379 IX86_BUILTIN_UCOMILTSD,
24380 IX86_BUILTIN_UCOMILESD,
24381 IX86_BUILTIN_UCOMIGTSD,
24382 IX86_BUILTIN_UCOMIGESD,
24383 IX86_BUILTIN_UCOMINEQSD,
24385 IX86_BUILTIN_MAXPD,
24386 IX86_BUILTIN_MAXSD,
24387 IX86_BUILTIN_MINPD,
24388 IX86_BUILTIN_MINSD,
24390 IX86_BUILTIN_ANDPD,
24391 IX86_BUILTIN_ANDNPD,
24393 IX86_BUILTIN_XORPD,
24395 IX86_BUILTIN_SQRTPD,
24396 IX86_BUILTIN_SQRTSD,
24398 IX86_BUILTIN_UNPCKHPD,
24399 IX86_BUILTIN_UNPCKLPD,
24401 IX86_BUILTIN_SHUFPD,
24403 IX86_BUILTIN_LOADUPD,
24404 IX86_BUILTIN_STOREUPD,
24405 IX86_BUILTIN_MOVSD,
24407 IX86_BUILTIN_LOADHPD,
24408 IX86_BUILTIN_LOADLPD,
24410 IX86_BUILTIN_CVTDQ2PD,
24411 IX86_BUILTIN_CVTDQ2PS,
24413 IX86_BUILTIN_CVTPD2DQ,
24414 IX86_BUILTIN_CVTPD2PI,
24415 IX86_BUILTIN_CVTPD2PS,
24416 IX86_BUILTIN_CVTTPD2DQ,
24417 IX86_BUILTIN_CVTTPD2PI,
24419 IX86_BUILTIN_CVTPI2PD,
24420 IX86_BUILTIN_CVTSI2SD,
24421 IX86_BUILTIN_CVTSI642SD,
24423 IX86_BUILTIN_CVTSD2SI,
24424 IX86_BUILTIN_CVTSD2SI64,
24425 IX86_BUILTIN_CVTSD2SS,
24426 IX86_BUILTIN_CVTSS2SD,
24427 IX86_BUILTIN_CVTTSD2SI,
24428 IX86_BUILTIN_CVTTSD2SI64,
24430 IX86_BUILTIN_CVTPS2DQ,
24431 IX86_BUILTIN_CVTPS2PD,
24432 IX86_BUILTIN_CVTTPS2DQ,
24434 IX86_BUILTIN_MOVNTI,
24435 IX86_BUILTIN_MOVNTPD,
24436 IX86_BUILTIN_MOVNTDQ,
24438 IX86_BUILTIN_MOVQ128,
24441 IX86_BUILTIN_MASKMOVDQU,
24442 IX86_BUILTIN_MOVMSKPD,
24443 IX86_BUILTIN_PMOVMSKB128,
24445 IX86_BUILTIN_PACKSSWB128,
24446 IX86_BUILTIN_PACKSSDW128,
24447 IX86_BUILTIN_PACKUSWB128,
24449 IX86_BUILTIN_PADDB128,
24450 IX86_BUILTIN_PADDW128,
24451 IX86_BUILTIN_PADDD128,
24452 IX86_BUILTIN_PADDQ128,
24453 IX86_BUILTIN_PADDSB128,
24454 IX86_BUILTIN_PADDSW128,
24455 IX86_BUILTIN_PADDUSB128,
24456 IX86_BUILTIN_PADDUSW128,
24457 IX86_BUILTIN_PSUBB128,
24458 IX86_BUILTIN_PSUBW128,
24459 IX86_BUILTIN_PSUBD128,
24460 IX86_BUILTIN_PSUBQ128,
24461 IX86_BUILTIN_PSUBSB128,
24462 IX86_BUILTIN_PSUBSW128,
24463 IX86_BUILTIN_PSUBUSB128,
24464 IX86_BUILTIN_PSUBUSW128,
24466 IX86_BUILTIN_PAND128,
24467 IX86_BUILTIN_PANDN128,
24468 IX86_BUILTIN_POR128,
24469 IX86_BUILTIN_PXOR128,
24471 IX86_BUILTIN_PAVGB128,
24472 IX86_BUILTIN_PAVGW128,
24474 IX86_BUILTIN_PCMPEQB128,
24475 IX86_BUILTIN_PCMPEQW128,
24476 IX86_BUILTIN_PCMPEQD128,
24477 IX86_BUILTIN_PCMPGTB128,
24478 IX86_BUILTIN_PCMPGTW128,
24479 IX86_BUILTIN_PCMPGTD128,
24481 IX86_BUILTIN_PMADDWD128,
24483 IX86_BUILTIN_PMAXSW128,
24484 IX86_BUILTIN_PMAXUB128,
24485 IX86_BUILTIN_PMINSW128,
24486 IX86_BUILTIN_PMINUB128,
24488 IX86_BUILTIN_PMULUDQ,
24489 IX86_BUILTIN_PMULUDQ128,
24490 IX86_BUILTIN_PMULHUW128,
24491 IX86_BUILTIN_PMULHW128,
24492 IX86_BUILTIN_PMULLW128,
24494 IX86_BUILTIN_PSADBW128,
24495 IX86_BUILTIN_PSHUFHW,
24496 IX86_BUILTIN_PSHUFLW,
24497 IX86_BUILTIN_PSHUFD,
24499 IX86_BUILTIN_PSLLDQI128,
24500 IX86_BUILTIN_PSLLWI128,
24501 IX86_BUILTIN_PSLLDI128,
24502 IX86_BUILTIN_PSLLQI128,
24503 IX86_BUILTIN_PSRAWI128,
24504 IX86_BUILTIN_PSRADI128,
24505 IX86_BUILTIN_PSRLDQI128,
24506 IX86_BUILTIN_PSRLWI128,
24507 IX86_BUILTIN_PSRLDI128,
24508 IX86_BUILTIN_PSRLQI128,
24510 IX86_BUILTIN_PSLLDQ128,
24511 IX86_BUILTIN_PSLLW128,
24512 IX86_BUILTIN_PSLLD128,
24513 IX86_BUILTIN_PSLLQ128,
24514 IX86_BUILTIN_PSRAW128,
24515 IX86_BUILTIN_PSRAD128,
24516 IX86_BUILTIN_PSRLW128,
24517 IX86_BUILTIN_PSRLD128,
24518 IX86_BUILTIN_PSRLQ128,
24520 IX86_BUILTIN_PUNPCKHBW128,
24521 IX86_BUILTIN_PUNPCKHWD128,
24522 IX86_BUILTIN_PUNPCKHDQ128,
24523 IX86_BUILTIN_PUNPCKHQDQ128,
24524 IX86_BUILTIN_PUNPCKLBW128,
24525 IX86_BUILTIN_PUNPCKLWD128,
24526 IX86_BUILTIN_PUNPCKLDQ128,
24527 IX86_BUILTIN_PUNPCKLQDQ128,
24529 IX86_BUILTIN_CLFLUSH,
24530 IX86_BUILTIN_MFENCE,
24531 IX86_BUILTIN_LFENCE,
24532 IX86_BUILTIN_PAUSE,
24534 IX86_BUILTIN_BSRSI,
24535 IX86_BUILTIN_BSRDI,
24536 IX86_BUILTIN_RDPMC,
24537 IX86_BUILTIN_RDTSC,
24538 IX86_BUILTIN_RDTSCP,
24539 IX86_BUILTIN_ROLQI,
24540 IX86_BUILTIN_ROLHI,
24541 IX86_BUILTIN_RORQI,
24542 IX86_BUILTIN_RORHI,
24545 IX86_BUILTIN_ADDSUBPS,
24546 IX86_BUILTIN_HADDPS,
24547 IX86_BUILTIN_HSUBPS,
24548 IX86_BUILTIN_MOVSHDUP,
24549 IX86_BUILTIN_MOVSLDUP,
24550 IX86_BUILTIN_ADDSUBPD,
24551 IX86_BUILTIN_HADDPD,
24552 IX86_BUILTIN_HSUBPD,
24553 IX86_BUILTIN_LDDQU,
24555 IX86_BUILTIN_MONITOR,
24556 IX86_BUILTIN_MWAIT,
24559 IX86_BUILTIN_PHADDW,
24560 IX86_BUILTIN_PHADDD,
24561 IX86_BUILTIN_PHADDSW,
24562 IX86_BUILTIN_PHSUBW,
24563 IX86_BUILTIN_PHSUBD,
24564 IX86_BUILTIN_PHSUBSW,
24565 IX86_BUILTIN_PMADDUBSW,
24566 IX86_BUILTIN_PMULHRSW,
24567 IX86_BUILTIN_PSHUFB,
24568 IX86_BUILTIN_PSIGNB,
24569 IX86_BUILTIN_PSIGNW,
24570 IX86_BUILTIN_PSIGND,
24571 IX86_BUILTIN_PALIGNR,
24572 IX86_BUILTIN_PABSB,
24573 IX86_BUILTIN_PABSW,
24574 IX86_BUILTIN_PABSD,
24576 IX86_BUILTIN_PHADDW128,
24577 IX86_BUILTIN_PHADDD128,
24578 IX86_BUILTIN_PHADDSW128,
24579 IX86_BUILTIN_PHSUBW128,
24580 IX86_BUILTIN_PHSUBD128,
24581 IX86_BUILTIN_PHSUBSW128,
24582 IX86_BUILTIN_PMADDUBSW128,
24583 IX86_BUILTIN_PMULHRSW128,
24584 IX86_BUILTIN_PSHUFB128,
24585 IX86_BUILTIN_PSIGNB128,
24586 IX86_BUILTIN_PSIGNW128,
24587 IX86_BUILTIN_PSIGND128,
24588 IX86_BUILTIN_PALIGNR128,
24589 IX86_BUILTIN_PABSB128,
24590 IX86_BUILTIN_PABSW128,
24591 IX86_BUILTIN_PABSD128,
24593 /* AMDFAM10 - SSE4A New Instructions. */
24594 IX86_BUILTIN_MOVNTSD,
24595 IX86_BUILTIN_MOVNTSS,
24596 IX86_BUILTIN_EXTRQI,
24597 IX86_BUILTIN_EXTRQ,
24598 IX86_BUILTIN_INSERTQI,
24599 IX86_BUILTIN_INSERTQ,
24602 IX86_BUILTIN_BLENDPD,
24603 IX86_BUILTIN_BLENDPS,
24604 IX86_BUILTIN_BLENDVPD,
24605 IX86_BUILTIN_BLENDVPS,
24606 IX86_BUILTIN_PBLENDVB128,
24607 IX86_BUILTIN_PBLENDW128,
24612 IX86_BUILTIN_INSERTPS128,
24614 IX86_BUILTIN_MOVNTDQA,
24615 IX86_BUILTIN_MPSADBW128,
24616 IX86_BUILTIN_PACKUSDW128,
24617 IX86_BUILTIN_PCMPEQQ,
24618 IX86_BUILTIN_PHMINPOSUW128,
24620 IX86_BUILTIN_PMAXSB128,
24621 IX86_BUILTIN_PMAXSD128,
24622 IX86_BUILTIN_PMAXUD128,
24623 IX86_BUILTIN_PMAXUW128,
24625 IX86_BUILTIN_PMINSB128,
24626 IX86_BUILTIN_PMINSD128,
24627 IX86_BUILTIN_PMINUD128,
24628 IX86_BUILTIN_PMINUW128,
24630 IX86_BUILTIN_PMOVSXBW128,
24631 IX86_BUILTIN_PMOVSXBD128,
24632 IX86_BUILTIN_PMOVSXBQ128,
24633 IX86_BUILTIN_PMOVSXWD128,
24634 IX86_BUILTIN_PMOVSXWQ128,
24635 IX86_BUILTIN_PMOVSXDQ128,
24637 IX86_BUILTIN_PMOVZXBW128,
24638 IX86_BUILTIN_PMOVZXBD128,
24639 IX86_BUILTIN_PMOVZXBQ128,
24640 IX86_BUILTIN_PMOVZXWD128,
24641 IX86_BUILTIN_PMOVZXWQ128,
24642 IX86_BUILTIN_PMOVZXDQ128,
24644 IX86_BUILTIN_PMULDQ128,
24645 IX86_BUILTIN_PMULLD128,
24647 IX86_BUILTIN_ROUNDPD,
24648 IX86_BUILTIN_ROUNDPS,
24649 IX86_BUILTIN_ROUNDSD,
24650 IX86_BUILTIN_ROUNDSS,
24652 IX86_BUILTIN_FLOORPD,
24653 IX86_BUILTIN_CEILPD,
24654 IX86_BUILTIN_TRUNCPD,
24655 IX86_BUILTIN_RINTPD,
24656 IX86_BUILTIN_ROUNDPD_AZ,
24657 IX86_BUILTIN_FLOORPS,
24658 IX86_BUILTIN_CEILPS,
24659 IX86_BUILTIN_TRUNCPS,
24660 IX86_BUILTIN_RINTPS,
24661 IX86_BUILTIN_ROUNDPS_AZ,
24663 IX86_BUILTIN_PTESTZ,
24664 IX86_BUILTIN_PTESTC,
24665 IX86_BUILTIN_PTESTNZC,
24667 IX86_BUILTIN_VEC_INIT_V2SI,
24668 IX86_BUILTIN_VEC_INIT_V4HI,
24669 IX86_BUILTIN_VEC_INIT_V8QI,
24670 IX86_BUILTIN_VEC_EXT_V2DF,
24671 IX86_BUILTIN_VEC_EXT_V2DI,
24672 IX86_BUILTIN_VEC_EXT_V4SF,
24673 IX86_BUILTIN_VEC_EXT_V4SI,
24674 IX86_BUILTIN_VEC_EXT_V8HI,
24675 IX86_BUILTIN_VEC_EXT_V2SI,
24676 IX86_BUILTIN_VEC_EXT_V4HI,
24677 IX86_BUILTIN_VEC_EXT_V16QI,
24678 IX86_BUILTIN_VEC_SET_V2DI,
24679 IX86_BUILTIN_VEC_SET_V4SF,
24680 IX86_BUILTIN_VEC_SET_V4SI,
24681 IX86_BUILTIN_VEC_SET_V8HI,
24682 IX86_BUILTIN_VEC_SET_V4HI,
24683 IX86_BUILTIN_VEC_SET_V16QI,
24685 IX86_BUILTIN_VEC_PACK_SFIX,
24688 IX86_BUILTIN_CRC32QI,
24689 IX86_BUILTIN_CRC32HI,
24690 IX86_BUILTIN_CRC32SI,
24691 IX86_BUILTIN_CRC32DI,
24693 IX86_BUILTIN_PCMPESTRI128,
24694 IX86_BUILTIN_PCMPESTRM128,
24695 IX86_BUILTIN_PCMPESTRA128,
24696 IX86_BUILTIN_PCMPESTRC128,
24697 IX86_BUILTIN_PCMPESTRO128,
24698 IX86_BUILTIN_PCMPESTRS128,
24699 IX86_BUILTIN_PCMPESTRZ128,
24700 IX86_BUILTIN_PCMPISTRI128,
24701 IX86_BUILTIN_PCMPISTRM128,
24702 IX86_BUILTIN_PCMPISTRA128,
24703 IX86_BUILTIN_PCMPISTRC128,
24704 IX86_BUILTIN_PCMPISTRO128,
24705 IX86_BUILTIN_PCMPISTRS128,
24706 IX86_BUILTIN_PCMPISTRZ128,
24708 IX86_BUILTIN_PCMPGTQ,
24710 /* AES instructions */
24711 IX86_BUILTIN_AESENC128,
24712 IX86_BUILTIN_AESENCLAST128,
24713 IX86_BUILTIN_AESDEC128,
24714 IX86_BUILTIN_AESDECLAST128,
24715 IX86_BUILTIN_AESIMC128,
24716 IX86_BUILTIN_AESKEYGENASSIST128,
24718 /* PCLMUL instruction */
24719 IX86_BUILTIN_PCLMULQDQ128,
24722 IX86_BUILTIN_ADDPD256,
24723 IX86_BUILTIN_ADDPS256,
24724 IX86_BUILTIN_ADDSUBPD256,
24725 IX86_BUILTIN_ADDSUBPS256,
24726 IX86_BUILTIN_ANDPD256,
24727 IX86_BUILTIN_ANDPS256,
24728 IX86_BUILTIN_ANDNPD256,
24729 IX86_BUILTIN_ANDNPS256,
24730 IX86_BUILTIN_BLENDPD256,
24731 IX86_BUILTIN_BLENDPS256,
24732 IX86_BUILTIN_BLENDVPD256,
24733 IX86_BUILTIN_BLENDVPS256,
24734 IX86_BUILTIN_DIVPD256,
24735 IX86_BUILTIN_DIVPS256,
24736 IX86_BUILTIN_DPPS256,
24737 IX86_BUILTIN_HADDPD256,
24738 IX86_BUILTIN_HADDPS256,
24739 IX86_BUILTIN_HSUBPD256,
24740 IX86_BUILTIN_HSUBPS256,
24741 IX86_BUILTIN_MAXPD256,
24742 IX86_BUILTIN_MAXPS256,
24743 IX86_BUILTIN_MINPD256,
24744 IX86_BUILTIN_MINPS256,
24745 IX86_BUILTIN_MULPD256,
24746 IX86_BUILTIN_MULPS256,
24747 IX86_BUILTIN_ORPD256,
24748 IX86_BUILTIN_ORPS256,
24749 IX86_BUILTIN_SHUFPD256,
24750 IX86_BUILTIN_SHUFPS256,
24751 IX86_BUILTIN_SUBPD256,
24752 IX86_BUILTIN_SUBPS256,
24753 IX86_BUILTIN_XORPD256,
24754 IX86_BUILTIN_XORPS256,
24755 IX86_BUILTIN_CMPSD,
24756 IX86_BUILTIN_CMPSS,
24757 IX86_BUILTIN_CMPPD,
24758 IX86_BUILTIN_CMPPS,
24759 IX86_BUILTIN_CMPPD256,
24760 IX86_BUILTIN_CMPPS256,
24761 IX86_BUILTIN_CVTDQ2PD256,
24762 IX86_BUILTIN_CVTDQ2PS256,
24763 IX86_BUILTIN_CVTPD2PS256,
24764 IX86_BUILTIN_CVTPS2DQ256,
24765 IX86_BUILTIN_CVTPS2PD256,
24766 IX86_BUILTIN_CVTTPD2DQ256,
24767 IX86_BUILTIN_CVTPD2DQ256,
24768 IX86_BUILTIN_CVTTPS2DQ256,
24769 IX86_BUILTIN_EXTRACTF128PD256,
24770 IX86_BUILTIN_EXTRACTF128PS256,
24771 IX86_BUILTIN_EXTRACTF128SI256,
24772 IX86_BUILTIN_VZEROALL,
24773 IX86_BUILTIN_VZEROUPPER,
24774 IX86_BUILTIN_VPERMILVARPD,
24775 IX86_BUILTIN_VPERMILVARPS,
24776 IX86_BUILTIN_VPERMILVARPD256,
24777 IX86_BUILTIN_VPERMILVARPS256,
24778 IX86_BUILTIN_VPERMILPD,
24779 IX86_BUILTIN_VPERMILPS,
24780 IX86_BUILTIN_VPERMILPD256,
24781 IX86_BUILTIN_VPERMILPS256,
24782 IX86_BUILTIN_VPERMIL2PD,
24783 IX86_BUILTIN_VPERMIL2PS,
24784 IX86_BUILTIN_VPERMIL2PD256,
24785 IX86_BUILTIN_VPERMIL2PS256,
24786 IX86_BUILTIN_VPERM2F128PD256,
24787 IX86_BUILTIN_VPERM2F128PS256,
24788 IX86_BUILTIN_VPERM2F128SI256,
24789 IX86_BUILTIN_VBROADCASTSS,
24790 IX86_BUILTIN_VBROADCASTSD256,
24791 IX86_BUILTIN_VBROADCASTSS256,
24792 IX86_BUILTIN_VBROADCASTPD256,
24793 IX86_BUILTIN_VBROADCASTPS256,
24794 IX86_BUILTIN_VINSERTF128PD256,
24795 IX86_BUILTIN_VINSERTF128PS256,
24796 IX86_BUILTIN_VINSERTF128SI256,
24797 IX86_BUILTIN_LOADUPD256,
24798 IX86_BUILTIN_LOADUPS256,
24799 IX86_BUILTIN_STOREUPD256,
24800 IX86_BUILTIN_STOREUPS256,
24801 IX86_BUILTIN_LDDQU256,
24802 IX86_BUILTIN_MOVNTDQ256,
24803 IX86_BUILTIN_MOVNTPD256,
24804 IX86_BUILTIN_MOVNTPS256,
24805 IX86_BUILTIN_LOADDQU256,
24806 IX86_BUILTIN_STOREDQU256,
24807 IX86_BUILTIN_MASKLOADPD,
24808 IX86_BUILTIN_MASKLOADPS,
24809 IX86_BUILTIN_MASKSTOREPD,
24810 IX86_BUILTIN_MASKSTOREPS,
24811 IX86_BUILTIN_MASKLOADPD256,
24812 IX86_BUILTIN_MASKLOADPS256,
24813 IX86_BUILTIN_MASKSTOREPD256,
24814 IX86_BUILTIN_MASKSTOREPS256,
24815 IX86_BUILTIN_MOVSHDUP256,
24816 IX86_BUILTIN_MOVSLDUP256,
24817 IX86_BUILTIN_MOVDDUP256,
24819 IX86_BUILTIN_SQRTPD256,
24820 IX86_BUILTIN_SQRTPS256,
24821 IX86_BUILTIN_SQRTPS_NR256,
24822 IX86_BUILTIN_RSQRTPS256,
24823 IX86_BUILTIN_RSQRTPS_NR256,
24825 IX86_BUILTIN_RCPPS256,
24827 IX86_BUILTIN_ROUNDPD256,
24828 IX86_BUILTIN_ROUNDPS256,
24830 IX86_BUILTIN_FLOORPD256,
24831 IX86_BUILTIN_CEILPD256,
24832 IX86_BUILTIN_TRUNCPD256,
24833 IX86_BUILTIN_RINTPD256,
24834 IX86_BUILTIN_ROUNDPD_AZ256,
24835 IX86_BUILTIN_FLOORPS256,
24836 IX86_BUILTIN_CEILPS256,
24837 IX86_BUILTIN_TRUNCPS256,
24838 IX86_BUILTIN_RINTPS256,
24839 IX86_BUILTIN_ROUNDPS_AZ256,
24841 IX86_BUILTIN_UNPCKHPD256,
24842 IX86_BUILTIN_UNPCKLPD256,
24843 IX86_BUILTIN_UNPCKHPS256,
24844 IX86_BUILTIN_UNPCKLPS256,
24846 IX86_BUILTIN_SI256_SI,
24847 IX86_BUILTIN_PS256_PS,
24848 IX86_BUILTIN_PD256_PD,
24849 IX86_BUILTIN_SI_SI256,
24850 IX86_BUILTIN_PS_PS256,
24851 IX86_BUILTIN_PD_PD256,
24853 IX86_BUILTIN_VTESTZPD,
24854 IX86_BUILTIN_VTESTCPD,
24855 IX86_BUILTIN_VTESTNZCPD,
24856 IX86_BUILTIN_VTESTZPS,
24857 IX86_BUILTIN_VTESTCPS,
24858 IX86_BUILTIN_VTESTNZCPS,
24859 IX86_BUILTIN_VTESTZPD256,
24860 IX86_BUILTIN_VTESTCPD256,
24861 IX86_BUILTIN_VTESTNZCPD256,
24862 IX86_BUILTIN_VTESTZPS256,
24863 IX86_BUILTIN_VTESTCPS256,
24864 IX86_BUILTIN_VTESTNZCPS256,
24865 IX86_BUILTIN_PTESTZ256,
24866 IX86_BUILTIN_PTESTC256,
24867 IX86_BUILTIN_PTESTNZC256,
24869 IX86_BUILTIN_MOVMSKPD256,
24870 IX86_BUILTIN_MOVMSKPS256,
24873 IX86_BUILTIN_MPSADBW256,
24874 IX86_BUILTIN_PABSB256,
24875 IX86_BUILTIN_PABSW256,
24876 IX86_BUILTIN_PABSD256,
24877 IX86_BUILTIN_PACKSSDW256,
24878 IX86_BUILTIN_PACKSSWB256,
24879 IX86_BUILTIN_PACKUSDW256,
24880 IX86_BUILTIN_PACKUSWB256,
24881 IX86_BUILTIN_PADDB256,
24882 IX86_BUILTIN_PADDW256,
24883 IX86_BUILTIN_PADDD256,
24884 IX86_BUILTIN_PADDQ256,
24885 IX86_BUILTIN_PADDSB256,
24886 IX86_BUILTIN_PADDSW256,
24887 IX86_BUILTIN_PADDUSB256,
24888 IX86_BUILTIN_PADDUSW256,
24889 IX86_BUILTIN_PALIGNR256,
24890 IX86_BUILTIN_AND256I,
24891 IX86_BUILTIN_ANDNOT256I,
24892 IX86_BUILTIN_PAVGB256,
24893 IX86_BUILTIN_PAVGW256,
24894 IX86_BUILTIN_PBLENDVB256,
24895 IX86_BUILTIN_PBLENDVW256,
24896 IX86_BUILTIN_PCMPEQB256,
24897 IX86_BUILTIN_PCMPEQW256,
24898 IX86_BUILTIN_PCMPEQD256,
24899 IX86_BUILTIN_PCMPEQQ256,
24900 IX86_BUILTIN_PCMPGTB256,
24901 IX86_BUILTIN_PCMPGTW256,
24902 IX86_BUILTIN_PCMPGTD256,
24903 IX86_BUILTIN_PCMPGTQ256,
24904 IX86_BUILTIN_PHADDW256,
24905 IX86_BUILTIN_PHADDD256,
24906 IX86_BUILTIN_PHADDSW256,
24907 IX86_BUILTIN_PHSUBW256,
24908 IX86_BUILTIN_PHSUBD256,
24909 IX86_BUILTIN_PHSUBSW256,
24910 IX86_BUILTIN_PMADDUBSW256,
24911 IX86_BUILTIN_PMADDWD256,
24912 IX86_BUILTIN_PMAXSB256,
24913 IX86_BUILTIN_PMAXSW256,
24914 IX86_BUILTIN_PMAXSD256,
24915 IX86_BUILTIN_PMAXUB256,
24916 IX86_BUILTIN_PMAXUW256,
24917 IX86_BUILTIN_PMAXUD256,
24918 IX86_BUILTIN_PMINSB256,
24919 IX86_BUILTIN_PMINSW256,
24920 IX86_BUILTIN_PMINSD256,
24921 IX86_BUILTIN_PMINUB256,
24922 IX86_BUILTIN_PMINUW256,
24923 IX86_BUILTIN_PMINUD256,
24924 IX86_BUILTIN_PMOVMSKB256,
24925 IX86_BUILTIN_PMOVSXBW256,
24926 IX86_BUILTIN_PMOVSXBD256,
24927 IX86_BUILTIN_PMOVSXBQ256,
24928 IX86_BUILTIN_PMOVSXWD256,
24929 IX86_BUILTIN_PMOVSXWQ256,
24930 IX86_BUILTIN_PMOVSXDQ256,
24931 IX86_BUILTIN_PMOVZXBW256,
24932 IX86_BUILTIN_PMOVZXBD256,
24933 IX86_BUILTIN_PMOVZXBQ256,
24934 IX86_BUILTIN_PMOVZXWD256,
24935 IX86_BUILTIN_PMOVZXWQ256,
24936 IX86_BUILTIN_PMOVZXDQ256,
24937 IX86_BUILTIN_PMULDQ256,
24938 IX86_BUILTIN_PMULHRSW256,
24939 IX86_BUILTIN_PMULHUW256,
24940 IX86_BUILTIN_PMULHW256,
24941 IX86_BUILTIN_PMULLW256,
24942 IX86_BUILTIN_PMULLD256,
24943 IX86_BUILTIN_PMULUDQ256,
24944 IX86_BUILTIN_POR256,
24945 IX86_BUILTIN_PSADBW256,
24946 IX86_BUILTIN_PSHUFB256,
24947 IX86_BUILTIN_PSHUFD256,
24948 IX86_BUILTIN_PSHUFHW256,
24949 IX86_BUILTIN_PSHUFLW256,
24950 IX86_BUILTIN_PSIGNB256,
24951 IX86_BUILTIN_PSIGNW256,
24952 IX86_BUILTIN_PSIGND256,
24953 IX86_BUILTIN_PSLLDQI256,
24954 IX86_BUILTIN_PSLLWI256,
24955 IX86_BUILTIN_PSLLW256,
24956 IX86_BUILTIN_PSLLDI256,
24957 IX86_BUILTIN_PSLLD256,
24958 IX86_BUILTIN_PSLLQI256,
24959 IX86_BUILTIN_PSLLQ256,
24960 IX86_BUILTIN_PSRAWI256,
24961 IX86_BUILTIN_PSRAW256,
24962 IX86_BUILTIN_PSRADI256,
24963 IX86_BUILTIN_PSRAD256,
24964 IX86_BUILTIN_PSRLDQI256,
24965 IX86_BUILTIN_PSRLWI256,
24966 IX86_BUILTIN_PSRLW256,
24967 IX86_BUILTIN_PSRLDI256,
24968 IX86_BUILTIN_PSRLD256,
24969 IX86_BUILTIN_PSRLQI256,
24970 IX86_BUILTIN_PSRLQ256,
24971 IX86_BUILTIN_PSUBB256,
24972 IX86_BUILTIN_PSUBW256,
24973 IX86_BUILTIN_PSUBD256,
24974 IX86_BUILTIN_PSUBQ256,
24975 IX86_BUILTIN_PSUBSB256,
24976 IX86_BUILTIN_PSUBSW256,
24977 IX86_BUILTIN_PSUBUSB256,
24978 IX86_BUILTIN_PSUBUSW256,
24979 IX86_BUILTIN_PUNPCKHBW256,
24980 IX86_BUILTIN_PUNPCKHWD256,
24981 IX86_BUILTIN_PUNPCKHDQ256,
24982 IX86_BUILTIN_PUNPCKHQDQ256,
24983 IX86_BUILTIN_PUNPCKLBW256,
24984 IX86_BUILTIN_PUNPCKLWD256,
24985 IX86_BUILTIN_PUNPCKLDQ256,
24986 IX86_BUILTIN_PUNPCKLQDQ256,
24987 IX86_BUILTIN_PXOR256,
24988 IX86_BUILTIN_MOVNTDQA256,
24989 IX86_BUILTIN_VBROADCASTSS_PS,
24990 IX86_BUILTIN_VBROADCASTSS_PS256,
24991 IX86_BUILTIN_VBROADCASTSD_PD256,
24992 IX86_BUILTIN_VBROADCASTSI256,
24993 IX86_BUILTIN_PBLENDD256,
24994 IX86_BUILTIN_PBLENDD128,
24995 IX86_BUILTIN_PBROADCASTB256,
24996 IX86_BUILTIN_PBROADCASTW256,
24997 IX86_BUILTIN_PBROADCASTD256,
24998 IX86_BUILTIN_PBROADCASTQ256,
24999 IX86_BUILTIN_PBROADCASTB128,
25000 IX86_BUILTIN_PBROADCASTW128,
25001 IX86_BUILTIN_PBROADCASTD128,
25002 IX86_BUILTIN_PBROADCASTQ128,
25003 IX86_BUILTIN_VPERMVARSI256,
25004 IX86_BUILTIN_VPERMDF256,
25005 IX86_BUILTIN_VPERMVARSF256,
25006 IX86_BUILTIN_VPERMDI256,
25007 IX86_BUILTIN_VPERMTI256,
25008 IX86_BUILTIN_VEXTRACT128I256,
25009 IX86_BUILTIN_VINSERT128I256,
25010 IX86_BUILTIN_MASKLOADD,
25011 IX86_BUILTIN_MASKLOADQ,
25012 IX86_BUILTIN_MASKLOADD256,
25013 IX86_BUILTIN_MASKLOADQ256,
25014 IX86_BUILTIN_MASKSTORED,
25015 IX86_BUILTIN_MASKSTOREQ,
25016 IX86_BUILTIN_MASKSTORED256,
25017 IX86_BUILTIN_MASKSTOREQ256,
25018 IX86_BUILTIN_PSLLVV4DI,
25019 IX86_BUILTIN_PSLLVV2DI,
25020 IX86_BUILTIN_PSLLVV8SI,
25021 IX86_BUILTIN_PSLLVV4SI,
25022 IX86_BUILTIN_PSRAVV8SI,
25023 IX86_BUILTIN_PSRAVV4SI,
25024 IX86_BUILTIN_PSRLVV4DI,
25025 IX86_BUILTIN_PSRLVV2DI,
25026 IX86_BUILTIN_PSRLVV8SI,
25027 IX86_BUILTIN_PSRLVV4SI,
25029 IX86_BUILTIN_GATHERSIV2DF,
25030 IX86_BUILTIN_GATHERSIV4DF,
25031 IX86_BUILTIN_GATHERDIV2DF,
25032 IX86_BUILTIN_GATHERDIV4DF,
25033 IX86_BUILTIN_GATHERSIV4SF,
25034 IX86_BUILTIN_GATHERSIV8SF,
25035 IX86_BUILTIN_GATHERDIV4SF,
25036 IX86_BUILTIN_GATHERDIV8SF,
25037 IX86_BUILTIN_GATHERSIV2DI,
25038 IX86_BUILTIN_GATHERSIV4DI,
25039 IX86_BUILTIN_GATHERDIV2DI,
25040 IX86_BUILTIN_GATHERDIV4DI,
25041 IX86_BUILTIN_GATHERSIV4SI,
25042 IX86_BUILTIN_GATHERSIV8SI,
25043 IX86_BUILTIN_GATHERDIV4SI,
25044 IX86_BUILTIN_GATHERDIV8SI,
25046 /* TFmode support builtins. */
25048 IX86_BUILTIN_HUGE_VALQ,
25049 IX86_BUILTIN_FABSQ,
25050 IX86_BUILTIN_COPYSIGNQ,
25052 /* Vectorizer support builtins. */
25053 IX86_BUILTIN_CPYSGNPS,
25054 IX86_BUILTIN_CPYSGNPD,
25055 IX86_BUILTIN_CPYSGNPS256,
25056 IX86_BUILTIN_CPYSGNPD256,
25058 IX86_BUILTIN_CVTUDQ2PS,
25060 /* FMA4 instructions. */
25061 IX86_BUILTIN_VFMADDSS,
25062 IX86_BUILTIN_VFMADDSD,
25063 IX86_BUILTIN_VFMADDPS,
25064 IX86_BUILTIN_VFMADDPD,
25065 IX86_BUILTIN_VFMADDPS256,
25066 IX86_BUILTIN_VFMADDPD256,
25067 IX86_BUILTIN_VFMADDSUBPS,
25068 IX86_BUILTIN_VFMADDSUBPD,
25069 IX86_BUILTIN_VFMADDSUBPS256,
25070 IX86_BUILTIN_VFMADDSUBPD256,
25072 /* FMA3 instructions. */
25073 IX86_BUILTIN_VFMADDSS3,
25074 IX86_BUILTIN_VFMADDSD3,
25076 /* XOP instructions. */
25077 IX86_BUILTIN_VPCMOV,
25078 IX86_BUILTIN_VPCMOV_V2DI,
25079 IX86_BUILTIN_VPCMOV_V4SI,
25080 IX86_BUILTIN_VPCMOV_V8HI,
25081 IX86_BUILTIN_VPCMOV_V16QI,
25082 IX86_BUILTIN_VPCMOV_V4SF,
25083 IX86_BUILTIN_VPCMOV_V2DF,
25084 IX86_BUILTIN_VPCMOV256,
25085 IX86_BUILTIN_VPCMOV_V4DI256,
25086 IX86_BUILTIN_VPCMOV_V8SI256,
25087 IX86_BUILTIN_VPCMOV_V16HI256,
25088 IX86_BUILTIN_VPCMOV_V32QI256,
25089 IX86_BUILTIN_VPCMOV_V8SF256,
25090 IX86_BUILTIN_VPCMOV_V4DF256,
25092 IX86_BUILTIN_VPPERM,
25094 IX86_BUILTIN_VPMACSSWW,
25095 IX86_BUILTIN_VPMACSWW,
25096 IX86_BUILTIN_VPMACSSWD,
25097 IX86_BUILTIN_VPMACSWD,
25098 IX86_BUILTIN_VPMACSSDD,
25099 IX86_BUILTIN_VPMACSDD,
25100 IX86_BUILTIN_VPMACSSDQL,
25101 IX86_BUILTIN_VPMACSSDQH,
25102 IX86_BUILTIN_VPMACSDQL,
25103 IX86_BUILTIN_VPMACSDQH,
25104 IX86_BUILTIN_VPMADCSSWD,
25105 IX86_BUILTIN_VPMADCSWD,
25107 IX86_BUILTIN_VPHADDBW,
25108 IX86_BUILTIN_VPHADDBD,
25109 IX86_BUILTIN_VPHADDBQ,
25110 IX86_BUILTIN_VPHADDWD,
25111 IX86_BUILTIN_VPHADDWQ,
25112 IX86_BUILTIN_VPHADDDQ,
25113 IX86_BUILTIN_VPHADDUBW,
25114 IX86_BUILTIN_VPHADDUBD,
25115 IX86_BUILTIN_VPHADDUBQ,
25116 IX86_BUILTIN_VPHADDUWD,
25117 IX86_BUILTIN_VPHADDUWQ,
25118 IX86_BUILTIN_VPHADDUDQ,
25119 IX86_BUILTIN_VPHSUBBW,
25120 IX86_BUILTIN_VPHSUBWD,
25121 IX86_BUILTIN_VPHSUBDQ,
25123 IX86_BUILTIN_VPROTB,
25124 IX86_BUILTIN_VPROTW,
25125 IX86_BUILTIN_VPROTD,
25126 IX86_BUILTIN_VPROTQ,
25127 IX86_BUILTIN_VPROTB_IMM,
25128 IX86_BUILTIN_VPROTW_IMM,
25129 IX86_BUILTIN_VPROTD_IMM,
25130 IX86_BUILTIN_VPROTQ_IMM,
25132 IX86_BUILTIN_VPSHLB,
25133 IX86_BUILTIN_VPSHLW,
25134 IX86_BUILTIN_VPSHLD,
25135 IX86_BUILTIN_VPSHLQ,
25136 IX86_BUILTIN_VPSHAB,
25137 IX86_BUILTIN_VPSHAW,
25138 IX86_BUILTIN_VPSHAD,
25139 IX86_BUILTIN_VPSHAQ,
25141 IX86_BUILTIN_VFRCZSS,
25142 IX86_BUILTIN_VFRCZSD,
25143 IX86_BUILTIN_VFRCZPS,
25144 IX86_BUILTIN_VFRCZPD,
25145 IX86_BUILTIN_VFRCZPS256,
25146 IX86_BUILTIN_VFRCZPD256,
25148 IX86_BUILTIN_VPCOMEQUB,
25149 IX86_BUILTIN_VPCOMNEUB,
25150 IX86_BUILTIN_VPCOMLTUB,
25151 IX86_BUILTIN_VPCOMLEUB,
25152 IX86_BUILTIN_VPCOMGTUB,
25153 IX86_BUILTIN_VPCOMGEUB,
25154 IX86_BUILTIN_VPCOMFALSEUB,
25155 IX86_BUILTIN_VPCOMTRUEUB,
25157 IX86_BUILTIN_VPCOMEQUW,
25158 IX86_BUILTIN_VPCOMNEUW,
25159 IX86_BUILTIN_VPCOMLTUW,
25160 IX86_BUILTIN_VPCOMLEUW,
25161 IX86_BUILTIN_VPCOMGTUW,
25162 IX86_BUILTIN_VPCOMGEUW,
25163 IX86_BUILTIN_VPCOMFALSEUW,
25164 IX86_BUILTIN_VPCOMTRUEUW,
25166 IX86_BUILTIN_VPCOMEQUD,
25167 IX86_BUILTIN_VPCOMNEUD,
25168 IX86_BUILTIN_VPCOMLTUD,
25169 IX86_BUILTIN_VPCOMLEUD,
25170 IX86_BUILTIN_VPCOMGTUD,
25171 IX86_BUILTIN_VPCOMGEUD,
25172 IX86_BUILTIN_VPCOMFALSEUD,
25173 IX86_BUILTIN_VPCOMTRUEUD,
25175 IX86_BUILTIN_VPCOMEQUQ,
25176 IX86_BUILTIN_VPCOMNEUQ,
25177 IX86_BUILTIN_VPCOMLTUQ,
25178 IX86_BUILTIN_VPCOMLEUQ,
25179 IX86_BUILTIN_VPCOMGTUQ,
25180 IX86_BUILTIN_VPCOMGEUQ,
25181 IX86_BUILTIN_VPCOMFALSEUQ,
25182 IX86_BUILTIN_VPCOMTRUEUQ,
25184 IX86_BUILTIN_VPCOMEQB,
25185 IX86_BUILTIN_VPCOMNEB,
25186 IX86_BUILTIN_VPCOMLTB,
25187 IX86_BUILTIN_VPCOMLEB,
25188 IX86_BUILTIN_VPCOMGTB,
25189 IX86_BUILTIN_VPCOMGEB,
25190 IX86_BUILTIN_VPCOMFALSEB,
25191 IX86_BUILTIN_VPCOMTRUEB,
25193 IX86_BUILTIN_VPCOMEQW,
25194 IX86_BUILTIN_VPCOMNEW,
25195 IX86_BUILTIN_VPCOMLTW,
25196 IX86_BUILTIN_VPCOMLEW,
25197 IX86_BUILTIN_VPCOMGTW,
25198 IX86_BUILTIN_VPCOMGEW,
25199 IX86_BUILTIN_VPCOMFALSEW,
25200 IX86_BUILTIN_VPCOMTRUEW,
25202 IX86_BUILTIN_VPCOMEQD,
25203 IX86_BUILTIN_VPCOMNED,
25204 IX86_BUILTIN_VPCOMLTD,
25205 IX86_BUILTIN_VPCOMLED,
25206 IX86_BUILTIN_VPCOMGTD,
25207 IX86_BUILTIN_VPCOMGED,
25208 IX86_BUILTIN_VPCOMFALSED,
25209 IX86_BUILTIN_VPCOMTRUED,
25211 IX86_BUILTIN_VPCOMEQQ,
25212 IX86_BUILTIN_VPCOMNEQ,
25213 IX86_BUILTIN_VPCOMLTQ,
25214 IX86_BUILTIN_VPCOMLEQ,
25215 IX86_BUILTIN_VPCOMGTQ,
25216 IX86_BUILTIN_VPCOMGEQ,
25217 IX86_BUILTIN_VPCOMFALSEQ,
25218 IX86_BUILTIN_VPCOMTRUEQ,
25220 /* LWP instructions. */
25221 IX86_BUILTIN_LLWPCB,
25222 IX86_BUILTIN_SLWPCB,
25223 IX86_BUILTIN_LWPVAL32,
25224 IX86_BUILTIN_LWPVAL64,
25225 IX86_BUILTIN_LWPINS32,
25226 IX86_BUILTIN_LWPINS64,
25230 /* BMI instructions. */
25231 IX86_BUILTIN_BEXTR32,
25232 IX86_BUILTIN_BEXTR64,
25235 /* TBM instructions. */
25236 IX86_BUILTIN_BEXTRI32,
25237 IX86_BUILTIN_BEXTRI64,
25239 /* BMI2 instructions. */
25240 IX86_BUILTIN_BZHI32,
25241 IX86_BUILTIN_BZHI64,
25242 IX86_BUILTIN_PDEP32,
25243 IX86_BUILTIN_PDEP64,
25244 IX86_BUILTIN_PEXT32,
25245 IX86_BUILTIN_PEXT64,
25247 /* FSGSBASE instructions. */
25248 IX86_BUILTIN_RDFSBASE32,
25249 IX86_BUILTIN_RDFSBASE64,
25250 IX86_BUILTIN_RDGSBASE32,
25251 IX86_BUILTIN_RDGSBASE64,
25252 IX86_BUILTIN_WRFSBASE32,
25253 IX86_BUILTIN_WRFSBASE64,
25254 IX86_BUILTIN_WRGSBASE32,
25255 IX86_BUILTIN_WRGSBASE64,
25257 /* RDRND instructions. */
25258 IX86_BUILTIN_RDRAND16_STEP,
25259 IX86_BUILTIN_RDRAND32_STEP,
25260 IX86_BUILTIN_RDRAND64_STEP,
25262 /* F16C instructions. */
25263 IX86_BUILTIN_CVTPH2PS,
25264 IX86_BUILTIN_CVTPH2PS256,
25265 IX86_BUILTIN_CVTPS2PH,
25266 IX86_BUILTIN_CVTPS2PH256,
25268 /* CFString built-in for darwin */
25269 IX86_BUILTIN_CFSTRING,
25274 /* Table for the ix86 builtin decls. */
25275 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
25277 /* Table of all of the builtin functions that are possible with different ISA's
25278 but are waiting to be built until a function is declared to use that
25280 struct builtin_isa {
25281 const char *name; /* function name */
25282 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
25283 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
25284 bool const_p; /* true if the declaration is constant */
25285 bool set_and_not_built_p;
25288 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
25291 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25292 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25293 function decl in the ix86_builtins array. Returns the function decl or
25294 NULL_TREE, if the builtin was not added.
25296 If the front end has a special hook for builtin functions, delay adding
25297 builtin functions that aren't in the current ISA until the ISA is changed
25298 with function specific optimization. Doing so, can save about 300K for the
25299 default compiler. When the builtin is expanded, check at that time whether
25302 If the front end doesn't have a special hook, record all builtins, even if
25303 it isn't an instruction set in the current ISA in case the user uses
25304 function specific options for a different ISA, so that we don't get scope
25305 errors if a builtin is added in the middle of a function scope. */
25308 def_builtin (HOST_WIDE_INT mask, const char *name,
25309 enum ix86_builtin_func_type tcode,
25310 enum ix86_builtins code)
25312 tree decl = NULL_TREE;
25314 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
25316 ix86_builtins_isa[(int) code].isa = mask;
25318 mask &= ~OPTION_MASK_ISA_64BIT;
25320 || (mask & ix86_isa_flags) != 0
25321 || (lang_hooks.builtin_function
25322 == lang_hooks.builtin_function_ext_scope))
25325 tree type = ix86_get_builtin_func_type (tcode);
25326 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
25328 ix86_builtins[(int) code] = decl;
25329 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
25333 ix86_builtins[(int) code] = NULL_TREE;
25334 ix86_builtins_isa[(int) code].tcode = tcode;
25335 ix86_builtins_isa[(int) code].name = name;
25336 ix86_builtins_isa[(int) code].const_p = false;
25337 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
25344 /* Like def_builtin, but also marks the function decl "const". */
25347 def_builtin_const (HOST_WIDE_INT mask, const char *name,
25348 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
25350 tree decl = def_builtin (mask, name, tcode, code);
25352 TREE_READONLY (decl) = 1;
25354 ix86_builtins_isa[(int) code].const_p = true;
25359 /* Add any new builtin functions for a given ISA that may not have been
25360 declared. This saves a bit of space compared to adding all of the
25361 declarations to the tree, even if we didn't use them. */
25364 ix86_add_new_builtins (HOST_WIDE_INT isa)
25368 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
25370 if ((ix86_builtins_isa[i].isa & isa) != 0
25371 && ix86_builtins_isa[i].set_and_not_built_p)
25375 /* Don't define the builtin again. */
25376 ix86_builtins_isa[i].set_and_not_built_p = false;
25378 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
25379 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
25380 type, i, BUILT_IN_MD, NULL,
25383 ix86_builtins[i] = decl;
25384 if (ix86_builtins_isa[i].const_p)
25385 TREE_READONLY (decl) = 1;
25390 /* Bits for builtin_description.flag. */
25392 /* Set when we don't support the comparison natively, and should
25393 swap_comparison in order to support it. */
25394 #define BUILTIN_DESC_SWAP_OPERANDS 1
25396 struct builtin_description
25398 const HOST_WIDE_INT mask;
25399 const enum insn_code icode;
25400 const char *const name;
25401 const enum ix86_builtins code;
25402 const enum rtx_code comparison;
25406 static const struct builtin_description bdesc_comi[] =
25408 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
25409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
25410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
25411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
25412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
25413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
25414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
25415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
25416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
25417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
25418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
25419 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
25420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
25421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
25422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
25423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
25424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
25425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
25426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
25427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
25428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
25429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
25430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
25431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
25434 static const struct builtin_description bdesc_pcmpestr[] =
25437 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
25438 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
25439 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
25440 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
25441 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
25442 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
25443 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
25446 static const struct builtin_description bdesc_pcmpistr[] =
25449 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
25450 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
25451 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
25452 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
25453 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
25454 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
25455 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
25458 /* Special builtins with variable number of arguments. */
25459 static const struct builtin_description bdesc_special_args[] =
25461 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
25462 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
25463 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
25466 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
25469 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
25472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
25476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
25477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
25478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
25479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
25481 /* SSE or 3DNow!A */
25482 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
25483 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
25486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
25487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
25488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
25489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
25490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
25491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
25492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
25493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
25494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
25496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
25497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
25500 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
25503 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
25506 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
25507 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25510 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
25511 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
25513 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
25514 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
25515 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
25516 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
25517 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
25519 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
25520 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
25521 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
25522 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
25523 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
25524 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
25525 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
25527 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
25528 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
25529 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
25531 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
25532 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
25533 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
25534 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
25535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
25536 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
25537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
25538 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
25541 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
25542 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
25543 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
25544 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
25545 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
25546 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
25547 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
25548 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
25549 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
25551 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
25552 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
25553 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
25554 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
25555 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
25556 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
25559 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
25560 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
25561 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
25562 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
25563 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
25564 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
25565 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
25566 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
25569 /* Builtins with variable number of arguments. */
25570 static const struct builtin_description bdesc_args[] =
25572 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
25573 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
25574 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
25575 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
25576 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
25577 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
25578 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
25581 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25582 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25583 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25584 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25585 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25586 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25588 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25589 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25590 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25591 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25592 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25593 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25594 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25595 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25597 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25598 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25600 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25601 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25602 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25603 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25605 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25606 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25607 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25608 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25609 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25610 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25612 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25613 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25614 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25615 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25616 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
25617 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
25619 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
25620 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
25621 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
25623 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
25625 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25626 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25627 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
25628 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25629 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25630 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
25632 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25633 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25634 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
25635 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25636 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25637 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
25639 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25640 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25641 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25642 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25645 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
25646 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
25647 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25648 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25650 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25651 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25652 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25653 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25654 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25655 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25656 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25657 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25658 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25659 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25660 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25661 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25662 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25663 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25664 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25667 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
25668 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
25669 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25670 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25671 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25672 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25675 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
25676 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25677 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25678 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25679 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25680 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25681 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
25682 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
25683 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
25684 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
25685 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
25686 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
25688 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25690 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25691 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25692 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25693 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25694 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25695 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25696 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25697 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25699 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
25700 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
25701 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
25702 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25703 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25704 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25705 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
25706 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
25707 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
25708 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25709 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
25710 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25711 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
25712 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
25713 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
25714 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25715 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
25716 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
25717 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
25718 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25719 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25720 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25722 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25723 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25724 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25725 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25727 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25728 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25729 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25730 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25732 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25734 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25735 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25736 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25737 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25738 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25740 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
25741 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
25742 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
25744 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
25746 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
25747 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
25748 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
25750 /* SSE MMX or 3Dnow!A */
25751 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25752 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25753 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25755 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25756 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25757 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25758 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25760 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
25761 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
25763 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
25766 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25768 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
25769 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
25770 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
25771 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
25772 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
25773 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
25775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
25776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
25777 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
25778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
25779 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
25781 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
25783 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
25784 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
25785 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
25786 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
25788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
25789 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
25790 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
25792 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25793 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25794 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25795 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25796 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25797 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25798 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25799 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25801 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25802 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25803 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25804 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25805 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
25806 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25807 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25808 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25809 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25810 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25811 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25812 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25813 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25814 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25815 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25816 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25817 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25818 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25819 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25820 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25822 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25823 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25824 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25825 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25827 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25829 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25830 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25832 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25834 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25835 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25836 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25838 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
25840 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25841 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25842 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25843 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25844 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25845 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25846 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25847 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25849 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25850 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25851 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25852 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25853 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25854 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25855 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25856 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25858 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25859 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
25861 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25862 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25863 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25864 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25867 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25870 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25872 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25873 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25874 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25876 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25877 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25878 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25879 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25881 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25882 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25883 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25884 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25885 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25886 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25887 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25888 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25890 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25891 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25892 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25894 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25895 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
25897 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
25898 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25900 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
25902 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
25903 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
25904 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
25905 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
25907 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25908 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25909 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25910 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25911 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25912 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25913 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25915 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25916 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25917 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25918 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25919 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25920 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25921 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25923 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25924 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25925 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25926 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25928 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
25929 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25930 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25932 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
25934 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
25935 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
25937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25940 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25941 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25944 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
25945 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25947 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25948 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25949 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25950 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25951 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25952 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25955 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
25956 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
25957 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25958 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
25959 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
25960 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25962 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25963 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25964 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25965 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25966 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25967 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25968 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25969 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25970 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25971 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25972 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25973 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25974 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
25975 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
25976 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25977 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25978 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25979 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25980 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25981 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25982 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25983 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25984 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25985 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25988 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
25989 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
25992 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25993 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25994 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
25995 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
25996 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25997 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25998 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25999 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
26000 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
26001 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
26003 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
26004 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
26005 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
26006 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
26007 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
26008 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
26009 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
26010 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
26011 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
26012 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
26013 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
26014 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
26015 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26017 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
26018 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26019 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26020 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26021 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26022 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26023 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26024 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26025 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26026 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26027 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
26028 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26031 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26032 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26033 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26034 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26036 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
26037 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
26038 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
26039 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
26041 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
26043 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
26044 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
26045 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
26046 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
26048 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26050 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26051 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26052 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26055 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26056 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
26057 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
26058 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26059 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26062 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
26063 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
26064 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
26065 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26068 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
26069 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26071 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26072 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26073 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26074 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26077 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
26080 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26081 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26084 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26085 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26086 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26087 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26088 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26089 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26090 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26091 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26092 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26093 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26094 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26095 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26096 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26097 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26098 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26099 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26100 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26101 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26102 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26103 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26104 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26105 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26107 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
26108 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
26109 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
26110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
26112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26113 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26114 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
26115 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
26116 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26117 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26118 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26119 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26120 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26121 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26123 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26124 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26125 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
26126 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
26127 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
26128 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
26129 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
26130 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
26131 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26132 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
26133 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26134 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26135 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26136 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26137 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26138 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26139 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26140 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26141 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26142 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26143 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
26144 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
26145 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
26147 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26148 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26149 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26151 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26152 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26153 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26154 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26155 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26157 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26159 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26160 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26162 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
26163 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
26164 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
26165 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
26167 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26169 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
26170 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
26171 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
26172 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
26174 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26176 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26177 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26178 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26179 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26181 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
26182 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
26183 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
26184 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
26185 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
26186 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
26188 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26189 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26190 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26191 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26192 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26193 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26194 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26195 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26196 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26197 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26198 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26199 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26200 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26201 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26202 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26204 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
26205 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
26207 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26208 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26211 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
26212 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
26213 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
26214 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
26215 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
26216 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
26217 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
26218 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
26219 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26220 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26221 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26222 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26223 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26224 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26225 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26226 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26227 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
26228 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26229 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26230 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26231 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26232 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
26233 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
26234 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26235 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26236 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26237 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26238 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26239 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26240 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26241 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26242 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26243 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26244 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26245 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26246 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26247 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26248 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26249 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
26250 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26251 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26252 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26253 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26254 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26255 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26256 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26257 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26258 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26259 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26260 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26261 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26262 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
26263 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26264 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26265 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26266 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26267 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26268 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26269 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26270 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26271 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26272 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26273 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26274 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26275 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26276 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26277 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26278 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26279 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26280 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26281 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26282 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26283 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26284 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26285 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
26286 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26287 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26288 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26289 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26290 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26291 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
26292 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26293 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26294 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26295 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26296 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26297 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26298 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26299 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26300 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26301 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26302 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
26303 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26304 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26305 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26306 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26307 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26308 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26309 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26310 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26311 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26312 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26313 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26314 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26315 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26316 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26317 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26318 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26319 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26320 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26321 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26322 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26323 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26324 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26325 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26326 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26327 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
26328 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
26329 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
26330 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
26331 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26332 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
26333 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
26334 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
26335 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
26336 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
26337 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26338 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
26339 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26340 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26341 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26342 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26343 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
26344 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
26345 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
26346 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
26347 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26348 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26349 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26350 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26351 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26352 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26353 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26354 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26355 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26356 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26358 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
26361 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26362 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26363 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
26366 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26367 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26370 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
26371 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
26372 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
26373 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
26376 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26377 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26378 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26379 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26380 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26381 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26384 /* FMA4 and XOP. */
26385 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
26386 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
26387 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
26388 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
26389 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
26390 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
26391 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
26392 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
26393 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
26394 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
26395 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
26396 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
26397 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
26398 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
26399 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
26400 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
26401 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
26402 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
26403 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
26404 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
26405 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
26406 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
26407 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
26408 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
26409 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
26410 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
26411 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
26412 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
26413 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
26414 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
26415 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
26416 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
26417 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
26418 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
26419 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
26420 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
26421 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
26422 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
26423 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
26424 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
26425 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
26426 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
26427 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
26428 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
26429 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
26430 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
26431 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
26432 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
26433 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
26434 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
26435 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
26436 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
26438 static const struct builtin_description bdesc_multi_arg[] =
26440 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
26441 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
26442 UNKNOWN, (int)MULTI_ARG_3_SF },
26443 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
26444 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
26445 UNKNOWN, (int)MULTI_ARG_3_DF },
26447 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
26448 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
26449 UNKNOWN, (int)MULTI_ARG_3_SF },
26450 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
26451 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
26452 UNKNOWN, (int)MULTI_ARG_3_DF },
26454 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
26455 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
26456 UNKNOWN, (int)MULTI_ARG_3_SF },
26457 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
26458 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
26459 UNKNOWN, (int)MULTI_ARG_3_DF },
26460 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
26461 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
26462 UNKNOWN, (int)MULTI_ARG_3_SF2 },
26463 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
26464 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
26465 UNKNOWN, (int)MULTI_ARG_3_DF2 },
26467 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
26468 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
26469 UNKNOWN, (int)MULTI_ARG_3_SF },
26470 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
26471 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
26472 UNKNOWN, (int)MULTI_ARG_3_DF },
26473 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
26474 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
26475 UNKNOWN, (int)MULTI_ARG_3_SF2 },
26476 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
26477 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
26478 UNKNOWN, (int)MULTI_ARG_3_DF2 },
26480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
26481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
26482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
26483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
26484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
26485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
26486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
26488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
26489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
26490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
26491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
26492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
26493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
26494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
26496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
26498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
26499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
26500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
26503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
26504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
26513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
26514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
26515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
26516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
26517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
26518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
26519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
26521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
26522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
26523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
26525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
26526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
26528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
26529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
26530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
26531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
26532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
26533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
26535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
26537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
26538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
26540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
26543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
26544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
26546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
26552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
26553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
26554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
26555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
26556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
26557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
26559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
26560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
26561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
26562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
26563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
26564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
26565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
26567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
26568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
26569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
26570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
26571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
26572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
26573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
26575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
26576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
26577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
26578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
26579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
26580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
26581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
26583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
26584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
26585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
26586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
26587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
26588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
26589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
26591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
26592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
26593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
26594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
26595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
26596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
26597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
26599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
26600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
26601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
26602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
26603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
26604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
26605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
26607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
26608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
26609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
26610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
26611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
26612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
26613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
26615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
26616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
26617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
26618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
26619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
26620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
26621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
26622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
26624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
26625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
26626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
26627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
26628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
26629 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
26630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
26631 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
26633 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
26634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
26635 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
26636 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
26640 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
26641 in the current target ISA to allow the user to compile particular modules
26642 with different target specific options that differ from the command line
26645 ix86_init_mmx_sse_builtins (void)
26647 const struct builtin_description * d;
26648 enum ix86_builtin_func_type ftype;
26651 /* Add all special builtins with variable number of operands. */
26652 for (i = 0, d = bdesc_special_args;
26653 i < ARRAY_SIZE (bdesc_special_args);
26659 ftype = (enum ix86_builtin_func_type) d->flag;
26660 def_builtin (d->mask, d->name, ftype, d->code);
26663 /* Add all builtins with variable number of operands. */
26664 for (i = 0, d = bdesc_args;
26665 i < ARRAY_SIZE (bdesc_args);
26671 ftype = (enum ix86_builtin_func_type) d->flag;
26672 def_builtin_const (d->mask, d->name, ftype, d->code);
26675 /* pcmpestr[im] insns. */
26676 for (i = 0, d = bdesc_pcmpestr;
26677 i < ARRAY_SIZE (bdesc_pcmpestr);
26680 if (d->code == IX86_BUILTIN_PCMPESTRM128)
26681 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
26683 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
26684 def_builtin_const (d->mask, d->name, ftype, d->code);
26687 /* pcmpistr[im] insns. */
26688 for (i = 0, d = bdesc_pcmpistr;
26689 i < ARRAY_SIZE (bdesc_pcmpistr);
26692 if (d->code == IX86_BUILTIN_PCMPISTRM128)
26693 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
26695 ftype = INT_FTYPE_V16QI_V16QI_INT;
26696 def_builtin_const (d->mask, d->name, ftype, d->code);
26699 /* comi/ucomi insns. */
26700 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
26702 if (d->mask == OPTION_MASK_ISA_SSE2)
26703 ftype = INT_FTYPE_V2DF_V2DF;
26705 ftype = INT_FTYPE_V4SF_V4SF;
26706 def_builtin_const (d->mask, d->name, ftype, d->code);
26710 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
26711 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
26712 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
26713 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
26715 /* SSE or 3DNow!A */
26716 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
26717 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
26718 IX86_BUILTIN_MASKMOVQ);
26721 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
26722 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
26724 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
26725 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
26726 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
26727 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
26730 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
26731 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
26732 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
26733 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
26736 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
26737 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
26738 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
26739 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
26740 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
26741 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
26742 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
26743 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
26744 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
26745 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
26746 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
26747 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
26750 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
26751 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
26754 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
26755 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
26756 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
26757 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
26758 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
26759 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
26760 IX86_BUILTIN_RDRAND64_STEP);
26763 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
26764 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
26765 IX86_BUILTIN_GATHERSIV2DF);
26767 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
26768 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
26769 IX86_BUILTIN_GATHERSIV4DF);
26771 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
26772 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
26773 IX86_BUILTIN_GATHERDIV2DF);
26775 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
26776 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
26777 IX86_BUILTIN_GATHERDIV4DF);
26779 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
26780 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
26781 IX86_BUILTIN_GATHERSIV4SF);
26783 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
26784 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
26785 IX86_BUILTIN_GATHERSIV8SF);
26787 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
26788 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
26789 IX86_BUILTIN_GATHERDIV4SF);
26791 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
26792 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
26793 IX86_BUILTIN_GATHERDIV8SF);
26795 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
26796 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
26797 IX86_BUILTIN_GATHERSIV2DI);
26799 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
26800 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
26801 IX86_BUILTIN_GATHERSIV4DI);
26803 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
26804 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
26805 IX86_BUILTIN_GATHERDIV2DI);
26807 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
26808 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
26809 IX86_BUILTIN_GATHERDIV4DI);
26811 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
26812 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
26813 IX86_BUILTIN_GATHERSIV4SI);
26815 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
26816 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
26817 IX86_BUILTIN_GATHERSIV8SI);
26819 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
26820 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
26821 IX86_BUILTIN_GATHERDIV4SI);
26823 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
26824 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
26825 IX86_BUILTIN_GATHERDIV8SI);
26827 /* MMX access to the vec_init patterns. */
26828 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
26829 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
26831 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
26832 V4HI_FTYPE_HI_HI_HI_HI,
26833 IX86_BUILTIN_VEC_INIT_V4HI);
26835 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
26836 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
26837 IX86_BUILTIN_VEC_INIT_V8QI);
26839 /* Access to the vec_extract patterns. */
26840 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
26841 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
26842 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
26843 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
26844 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
26845 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
26846 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
26847 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
26848 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
26849 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
26851 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
26852 "__builtin_ia32_vec_ext_v4hi",
26853 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
26855 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
26856 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
26858 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
26859 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
26861 /* Access to the vec_set patterns. */
26862 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
26863 "__builtin_ia32_vec_set_v2di",
26864 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
26866 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
26867 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
26869 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
26870 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
26872 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
26873 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
26875 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
26876 "__builtin_ia32_vec_set_v4hi",
26877 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
26879 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
26880 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
26882 /* Add FMA4 multi-arg argument instructions */
26883 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
26888 ftype = (enum ix86_builtin_func_type) d->flag;
26889 def_builtin_const (d->mask, d->name, ftype, d->code);
26893 /* Internal method for ix86_init_builtins. */
26896 ix86_init_builtins_va_builtins_abi (void)
26898 tree ms_va_ref, sysv_va_ref;
26899 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
26900 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
26901 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
26902 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
26906 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
26907 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
26908 ms_va_ref = build_reference_type (ms_va_list_type_node);
26910 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
26913 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
26914 fnvoid_va_start_ms =
26915 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
26916 fnvoid_va_end_sysv =
26917 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
26918 fnvoid_va_start_sysv =
26919 build_varargs_function_type_list (void_type_node, sysv_va_ref,
26921 fnvoid_va_copy_ms =
26922 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
26924 fnvoid_va_copy_sysv =
26925 build_function_type_list (void_type_node, sysv_va_ref,
26926 sysv_va_ref, NULL_TREE);
26928 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
26929 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
26930 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
26931 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
26932 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
26933 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
26934 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
26935 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
26936 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
26937 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
26938 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
26939 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
26943 ix86_init_builtin_types (void)
26945 tree float128_type_node, float80_type_node;
26947 /* The __float80 type. */
26948 float80_type_node = long_double_type_node;
26949 if (TYPE_MODE (float80_type_node) != XFmode)
26951 /* The __float80 type. */
26952 float80_type_node = make_node (REAL_TYPE);
26954 TYPE_PRECISION (float80_type_node) = 80;
26955 layout_type (float80_type_node);
26957 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
26959 /* The __float128 type. */
26960 float128_type_node = make_node (REAL_TYPE);
26961 TYPE_PRECISION (float128_type_node) = 128;
26962 layout_type (float128_type_node);
26963 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
26965 /* This macro is built by i386-builtin-types.awk. */
26966 DEFINE_BUILTIN_PRIMITIVE_TYPES;
26970 ix86_init_builtins (void)
26974 ix86_init_builtin_types ();
26976 /* TFmode support builtins. */
26977 def_builtin_const (0, "__builtin_infq",
26978 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
26979 def_builtin_const (0, "__builtin_huge_valq",
26980 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
26982 /* We will expand them to normal call if SSE2 isn't available since
26983 they are used by libgcc. */
26984 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
26985 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
26986 BUILT_IN_MD, "__fabstf2", NULL_TREE);
26987 TREE_READONLY (t) = 1;
26988 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
26990 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
26991 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
26992 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
26993 TREE_READONLY (t) = 1;
26994 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
26996 ix86_init_mmx_sse_builtins ();
26999 ix86_init_builtins_va_builtins_abi ();
27001 #ifdef SUBTARGET_INIT_BUILTINS
27002 SUBTARGET_INIT_BUILTINS;
27006 /* Return the ix86 builtin for CODE. */
27009 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
27011 if (code >= IX86_BUILTIN_MAX)
27012 return error_mark_node;
27014 return ix86_builtins[code];
27017 /* Errors in the source file can cause expand_expr to return const0_rtx
27018 where we expect a vector. To avoid crashing, use one of the vector
27019 clear instructions. */
27021 safe_vector_operand (rtx x, enum machine_mode mode)
27023 if (x == const0_rtx)
27024 x = CONST0_RTX (mode);
27028 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
27031 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
27034 tree arg0 = CALL_EXPR_ARG (exp, 0);
27035 tree arg1 = CALL_EXPR_ARG (exp, 1);
27036 rtx op0 = expand_normal (arg0);
27037 rtx op1 = expand_normal (arg1);
27038 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27039 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
27040 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
27042 if (VECTOR_MODE_P (mode0))
27043 op0 = safe_vector_operand (op0, mode0);
27044 if (VECTOR_MODE_P (mode1))
27045 op1 = safe_vector_operand (op1, mode1);
27047 if (optimize || !target
27048 || GET_MODE (target) != tmode
27049 || !insn_data[icode].operand[0].predicate (target, tmode))
27050 target = gen_reg_rtx (tmode);
27052 if (GET_MODE (op1) == SImode && mode1 == TImode)
27054 rtx x = gen_reg_rtx (V4SImode);
27055 emit_insn (gen_sse2_loadd (x, op1));
27056 op1 = gen_lowpart (TImode, x);
27059 if (!insn_data[icode].operand[1].predicate (op0, mode0))
27060 op0 = copy_to_mode_reg (mode0, op0);
27061 if (!insn_data[icode].operand[2].predicate (op1, mode1))
27062 op1 = copy_to_mode_reg (mode1, op1);
27064 pat = GEN_FCN (icode) (target, op0, op1);
27073 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
27076 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
27077 enum ix86_builtin_func_type m_type,
27078 enum rtx_code sub_code)
27083 bool comparison_p = false;
27085 bool last_arg_constant = false;
27086 int num_memory = 0;
27089 enum machine_mode mode;
27092 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27096 case MULTI_ARG_4_DF2_DI_I:
27097 case MULTI_ARG_4_DF2_DI_I1:
27098 case MULTI_ARG_4_SF2_SI_I:
27099 case MULTI_ARG_4_SF2_SI_I1:
27101 last_arg_constant = true;
27104 case MULTI_ARG_3_SF:
27105 case MULTI_ARG_3_DF:
27106 case MULTI_ARG_3_SF2:
27107 case MULTI_ARG_3_DF2:
27108 case MULTI_ARG_3_DI:
27109 case MULTI_ARG_3_SI:
27110 case MULTI_ARG_3_SI_DI:
27111 case MULTI_ARG_3_HI:
27112 case MULTI_ARG_3_HI_SI:
27113 case MULTI_ARG_3_QI:
27114 case MULTI_ARG_3_DI2:
27115 case MULTI_ARG_3_SI2:
27116 case MULTI_ARG_3_HI2:
27117 case MULTI_ARG_3_QI2:
27121 case MULTI_ARG_2_SF:
27122 case MULTI_ARG_2_DF:
27123 case MULTI_ARG_2_DI:
27124 case MULTI_ARG_2_SI:
27125 case MULTI_ARG_2_HI:
27126 case MULTI_ARG_2_QI:
27130 case MULTI_ARG_2_DI_IMM:
27131 case MULTI_ARG_2_SI_IMM:
27132 case MULTI_ARG_2_HI_IMM:
27133 case MULTI_ARG_2_QI_IMM:
27135 last_arg_constant = true;
27138 case MULTI_ARG_1_SF:
27139 case MULTI_ARG_1_DF:
27140 case MULTI_ARG_1_SF2:
27141 case MULTI_ARG_1_DF2:
27142 case MULTI_ARG_1_DI:
27143 case MULTI_ARG_1_SI:
27144 case MULTI_ARG_1_HI:
27145 case MULTI_ARG_1_QI:
27146 case MULTI_ARG_1_SI_DI:
27147 case MULTI_ARG_1_HI_DI:
27148 case MULTI_ARG_1_HI_SI:
27149 case MULTI_ARG_1_QI_DI:
27150 case MULTI_ARG_1_QI_SI:
27151 case MULTI_ARG_1_QI_HI:
27155 case MULTI_ARG_2_DI_CMP:
27156 case MULTI_ARG_2_SI_CMP:
27157 case MULTI_ARG_2_HI_CMP:
27158 case MULTI_ARG_2_QI_CMP:
27160 comparison_p = true;
27163 case MULTI_ARG_2_SF_TF:
27164 case MULTI_ARG_2_DF_TF:
27165 case MULTI_ARG_2_DI_TF:
27166 case MULTI_ARG_2_SI_TF:
27167 case MULTI_ARG_2_HI_TF:
27168 case MULTI_ARG_2_QI_TF:
27174 gcc_unreachable ();
27177 if (optimize || !target
27178 || GET_MODE (target) != tmode
27179 || !insn_data[icode].operand[0].predicate (target, tmode))
27180 target = gen_reg_rtx (tmode);
27182 gcc_assert (nargs <= 4);
27184 for (i = 0; i < nargs; i++)
27186 tree arg = CALL_EXPR_ARG (exp, i);
27187 rtx op = expand_normal (arg);
27188 int adjust = (comparison_p) ? 1 : 0;
27189 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
27191 if (last_arg_constant && i == nargs - 1)
27193 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
27195 enum insn_code new_icode = icode;
27198 case CODE_FOR_xop_vpermil2v2df3:
27199 case CODE_FOR_xop_vpermil2v4sf3:
27200 case CODE_FOR_xop_vpermil2v4df3:
27201 case CODE_FOR_xop_vpermil2v8sf3:
27202 error ("the last argument must be a 2-bit immediate");
27203 return gen_reg_rtx (tmode);
27204 case CODE_FOR_xop_rotlv2di3:
27205 new_icode = CODE_FOR_rotlv2di3;
27207 case CODE_FOR_xop_rotlv4si3:
27208 new_icode = CODE_FOR_rotlv4si3;
27210 case CODE_FOR_xop_rotlv8hi3:
27211 new_icode = CODE_FOR_rotlv8hi3;
27213 case CODE_FOR_xop_rotlv16qi3:
27214 new_icode = CODE_FOR_rotlv16qi3;
27216 if (CONST_INT_P (op))
27218 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
27219 op = GEN_INT (INTVAL (op) & mask);
27220 gcc_checking_assert
27221 (insn_data[icode].operand[i + 1].predicate (op, mode));
27225 gcc_checking_assert
27227 && insn_data[new_icode].operand[0].mode == tmode
27228 && insn_data[new_icode].operand[1].mode == tmode
27229 && insn_data[new_icode].operand[2].mode == mode
27230 && insn_data[new_icode].operand[0].predicate
27231 == insn_data[icode].operand[0].predicate
27232 && insn_data[new_icode].operand[1].predicate
27233 == insn_data[icode].operand[1].predicate);
27239 gcc_unreachable ();
27246 if (VECTOR_MODE_P (mode))
27247 op = safe_vector_operand (op, mode);
27249 /* If we aren't optimizing, only allow one memory operand to be
27251 if (memory_operand (op, mode))
27254 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
27257 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
27259 op = force_reg (mode, op);
27263 args[i].mode = mode;
27269 pat = GEN_FCN (icode) (target, args[0].op);
27274 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
27275 GEN_INT ((int)sub_code));
27276 else if (! comparison_p)
27277 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27280 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
27284 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
27289 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
27293 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
27297 gcc_unreachable ();
27307 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
27308 insns with vec_merge. */
27311 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
27315 tree arg0 = CALL_EXPR_ARG (exp, 0);
27316 rtx op1, op0 = expand_normal (arg0);
27317 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27318 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
27320 if (optimize || !target
27321 || GET_MODE (target) != tmode
27322 || !insn_data[icode].operand[0].predicate (target, tmode))
27323 target = gen_reg_rtx (tmode);
27325 if (VECTOR_MODE_P (mode0))
27326 op0 = safe_vector_operand (op0, mode0);
27328 if ((optimize && !register_operand (op0, mode0))
27329 || !insn_data[icode].operand[1].predicate (op0, mode0))
27330 op0 = copy_to_mode_reg (mode0, op0);
27333 if (!insn_data[icode].operand[2].predicate (op1, mode0))
27334 op1 = copy_to_mode_reg (mode0, op1);
27336 pat = GEN_FCN (icode) (target, op0, op1);
27343 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
27346 ix86_expand_sse_compare (const struct builtin_description *d,
27347 tree exp, rtx target, bool swap)
27350 tree arg0 = CALL_EXPR_ARG (exp, 0);
27351 tree arg1 = CALL_EXPR_ARG (exp, 1);
27352 rtx op0 = expand_normal (arg0);
27353 rtx op1 = expand_normal (arg1);
27355 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
27356 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
27357 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
27358 enum rtx_code comparison = d->comparison;
27360 if (VECTOR_MODE_P (mode0))
27361 op0 = safe_vector_operand (op0, mode0);
27362 if (VECTOR_MODE_P (mode1))
27363 op1 = safe_vector_operand (op1, mode1);
27365 /* Swap operands if we have a comparison that isn't available in
27369 rtx tmp = gen_reg_rtx (mode1);
27370 emit_move_insn (tmp, op1);
27375 if (optimize || !target
27376 || GET_MODE (target) != tmode
27377 || !insn_data[d->icode].operand[0].predicate (target, tmode))
27378 target = gen_reg_rtx (tmode);
27380 if ((optimize && !register_operand (op0, mode0))
27381 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
27382 op0 = copy_to_mode_reg (mode0, op0);
27383 if ((optimize && !register_operand (op1, mode1))
27384 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
27385 op1 = copy_to_mode_reg (mode1, op1);
27387 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
27388 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
27395 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
27398 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
27402 tree arg0 = CALL_EXPR_ARG (exp, 0);
27403 tree arg1 = CALL_EXPR_ARG (exp, 1);
27404 rtx op0 = expand_normal (arg0);
27405 rtx op1 = expand_normal (arg1);
27406 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
27407 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
27408 enum rtx_code comparison = d->comparison;
27410 if (VECTOR_MODE_P (mode0))
27411 op0 = safe_vector_operand (op0, mode0);
27412 if (VECTOR_MODE_P (mode1))
27413 op1 = safe_vector_operand (op1, mode1);
27415 /* Swap operands if we have a comparison that isn't available in
27417 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
27424 target = gen_reg_rtx (SImode);
27425 emit_move_insn (target, const0_rtx);
27426 target = gen_rtx_SUBREG (QImode, target, 0);
27428 if ((optimize && !register_operand (op0, mode0))
27429 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
27430 op0 = copy_to_mode_reg (mode0, op0);
27431 if ((optimize && !register_operand (op1, mode1))
27432 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
27433 op1 = copy_to_mode_reg (mode1, op1);
27435 pat = GEN_FCN (d->icode) (op0, op1);
27439 emit_insn (gen_rtx_SET (VOIDmode,
27440 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27441 gen_rtx_fmt_ee (comparison, QImode,
27445 return SUBREG_REG (target);
27448 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
27451 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
27455 tree arg0 = CALL_EXPR_ARG (exp, 0);
27456 rtx op1, op0 = expand_normal (arg0);
27457 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
27458 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
27460 if (optimize || target == 0
27461 || GET_MODE (target) != tmode
27462 || !insn_data[d->icode].operand[0].predicate (target, tmode))
27463 target = gen_reg_rtx (tmode);
27465 if (VECTOR_MODE_P (mode0))
27466 op0 = safe_vector_operand (op0, mode0);
27468 if ((optimize && !register_operand (op0, mode0))
27469 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
27470 op0 = copy_to_mode_reg (mode0, op0);
27472 op1 = GEN_INT (d->comparison);
27474 pat = GEN_FCN (d->icode) (target, op0, op1);
27481 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
27484 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
27488 tree arg0 = CALL_EXPR_ARG (exp, 0);
27489 tree arg1 = CALL_EXPR_ARG (exp, 1);
27490 rtx op0 = expand_normal (arg0);
27491 rtx op1 = expand_normal (arg1);
27492 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
27493 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
27494 enum rtx_code comparison = d->comparison;
27496 if (VECTOR_MODE_P (mode0))
27497 op0 = safe_vector_operand (op0, mode0);
27498 if (VECTOR_MODE_P (mode1))
27499 op1 = safe_vector_operand (op1, mode1);
27501 target = gen_reg_rtx (SImode);
27502 emit_move_insn (target, const0_rtx);
27503 target = gen_rtx_SUBREG (QImode, target, 0);
27505 if ((optimize && !register_operand (op0, mode0))
27506 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
27507 op0 = copy_to_mode_reg (mode0, op0);
27508 if ((optimize && !register_operand (op1, mode1))
27509 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
27510 op1 = copy_to_mode_reg (mode1, op1);
27512 pat = GEN_FCN (d->icode) (op0, op1);
27516 emit_insn (gen_rtx_SET (VOIDmode,
27517 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27518 gen_rtx_fmt_ee (comparison, QImode,
27522 return SUBREG_REG (target);
27525 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
27528 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
27529 tree exp, rtx target)
27532 tree arg0 = CALL_EXPR_ARG (exp, 0);
27533 tree arg1 = CALL_EXPR_ARG (exp, 1);
27534 tree arg2 = CALL_EXPR_ARG (exp, 2);
27535 tree arg3 = CALL_EXPR_ARG (exp, 3);
27536 tree arg4 = CALL_EXPR_ARG (exp, 4);
27537 rtx scratch0, scratch1;
27538 rtx op0 = expand_normal (arg0);
27539 rtx op1 = expand_normal (arg1);
27540 rtx op2 = expand_normal (arg2);
27541 rtx op3 = expand_normal (arg3);
27542 rtx op4 = expand_normal (arg4);
27543 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
27545 tmode0 = insn_data[d->icode].operand[0].mode;
27546 tmode1 = insn_data[d->icode].operand[1].mode;
27547 modev2 = insn_data[d->icode].operand[2].mode;
27548 modei3 = insn_data[d->icode].operand[3].mode;
27549 modev4 = insn_data[d->icode].operand[4].mode;
27550 modei5 = insn_data[d->icode].operand[5].mode;
27551 modeimm = insn_data[d->icode].operand[6].mode;
27553 if (VECTOR_MODE_P (modev2))
27554 op0 = safe_vector_operand (op0, modev2);
27555 if (VECTOR_MODE_P (modev4))
27556 op2 = safe_vector_operand (op2, modev4);
27558 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
27559 op0 = copy_to_mode_reg (modev2, op0);
27560 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
27561 op1 = copy_to_mode_reg (modei3, op1);
27562 if ((optimize && !register_operand (op2, modev4))
27563 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
27564 op2 = copy_to_mode_reg (modev4, op2);
27565 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
27566 op3 = copy_to_mode_reg (modei5, op3);
27568 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
27570 error ("the fifth argument must be an 8-bit immediate");
27574 if (d->code == IX86_BUILTIN_PCMPESTRI128)
27576 if (optimize || !target
27577 || GET_MODE (target) != tmode0
27578 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
27579 target = gen_reg_rtx (tmode0);
27581 scratch1 = gen_reg_rtx (tmode1);
27583 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
27585 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
27587 if (optimize || !target
27588 || GET_MODE (target) != tmode1
27589 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
27590 target = gen_reg_rtx (tmode1);
27592 scratch0 = gen_reg_rtx (tmode0);
27594 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
27598 gcc_assert (d->flag);
27600 scratch0 = gen_reg_rtx (tmode0);
27601 scratch1 = gen_reg_rtx (tmode1);
27603 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
27613 target = gen_reg_rtx (SImode);
27614 emit_move_insn (target, const0_rtx);
27615 target = gen_rtx_SUBREG (QImode, target, 0);
27618 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27619 gen_rtx_fmt_ee (EQ, QImode,
27620 gen_rtx_REG ((enum machine_mode) d->flag,
27623 return SUBREG_REG (target);
27630 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
27633 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
27634 tree exp, rtx target)
27637 tree arg0 = CALL_EXPR_ARG (exp, 0);
27638 tree arg1 = CALL_EXPR_ARG (exp, 1);
27639 tree arg2 = CALL_EXPR_ARG (exp, 2);
27640 rtx scratch0, scratch1;
27641 rtx op0 = expand_normal (arg0);
27642 rtx op1 = expand_normal (arg1);
27643 rtx op2 = expand_normal (arg2);
27644 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
27646 tmode0 = insn_data[d->icode].operand[0].mode;
27647 tmode1 = insn_data[d->icode].operand[1].mode;
27648 modev2 = insn_data[d->icode].operand[2].mode;
27649 modev3 = insn_data[d->icode].operand[3].mode;
27650 modeimm = insn_data[d->icode].operand[4].mode;
27652 if (VECTOR_MODE_P (modev2))
27653 op0 = safe_vector_operand (op0, modev2);
27654 if (VECTOR_MODE_P (modev3))
27655 op1 = safe_vector_operand (op1, modev3);
27657 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
27658 op0 = copy_to_mode_reg (modev2, op0);
27659 if ((optimize && !register_operand (op1, modev3))
27660 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
27661 op1 = copy_to_mode_reg (modev3, op1);
27663 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
27665 error ("the third argument must be an 8-bit immediate");
27669 if (d->code == IX86_BUILTIN_PCMPISTRI128)
27671 if (optimize || !target
27672 || GET_MODE (target) != tmode0
27673 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
27674 target = gen_reg_rtx (tmode0);
27676 scratch1 = gen_reg_rtx (tmode1);
27678 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
27680 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
27682 if (optimize || !target
27683 || GET_MODE (target) != tmode1
27684 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
27685 target = gen_reg_rtx (tmode1);
27687 scratch0 = gen_reg_rtx (tmode0);
27689 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
27693 gcc_assert (d->flag);
27695 scratch0 = gen_reg_rtx (tmode0);
27696 scratch1 = gen_reg_rtx (tmode1);
27698 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
27708 target = gen_reg_rtx (SImode);
27709 emit_move_insn (target, const0_rtx);
27710 target = gen_rtx_SUBREG (QImode, target, 0);
27713 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27714 gen_rtx_fmt_ee (EQ, QImode,
27715 gen_rtx_REG ((enum machine_mode) d->flag,
27718 return SUBREG_REG (target);
27724 /* Subroutine of ix86_expand_builtin to take care of insns with
27725 variable number of operands. */
27728 ix86_expand_args_builtin (const struct builtin_description *d,
27729 tree exp, rtx target)
27731 rtx pat, real_target;
27732 unsigned int i, nargs;
27733 unsigned int nargs_constant = 0;
27734 int num_memory = 0;
27738 enum machine_mode mode;
27740 bool last_arg_count = false;
27741 enum insn_code icode = d->icode;
27742 const struct insn_data_d *insn_p = &insn_data[icode];
27743 enum machine_mode tmode = insn_p->operand[0].mode;
27744 enum machine_mode rmode = VOIDmode;
27746 enum rtx_code comparison = d->comparison;
27748 switch ((enum ix86_builtin_func_type) d->flag)
27750 case V2DF_FTYPE_V2DF_ROUND:
27751 case V4DF_FTYPE_V4DF_ROUND:
27752 case V4SF_FTYPE_V4SF_ROUND:
27753 case V8SF_FTYPE_V8SF_ROUND:
27754 return ix86_expand_sse_round (d, exp, target);
27755 case INT_FTYPE_V8SF_V8SF_PTEST:
27756 case INT_FTYPE_V4DI_V4DI_PTEST:
27757 case INT_FTYPE_V4DF_V4DF_PTEST:
27758 case INT_FTYPE_V4SF_V4SF_PTEST:
27759 case INT_FTYPE_V2DI_V2DI_PTEST:
27760 case INT_FTYPE_V2DF_V2DF_PTEST:
27761 return ix86_expand_sse_ptest (d, exp, target);
27762 case FLOAT128_FTYPE_FLOAT128:
27763 case FLOAT_FTYPE_FLOAT:
27764 case INT_FTYPE_INT:
27765 case UINT64_FTYPE_INT:
27766 case UINT16_FTYPE_UINT16:
27767 case INT64_FTYPE_INT64:
27768 case INT64_FTYPE_V4SF:
27769 case INT64_FTYPE_V2DF:
27770 case INT_FTYPE_V16QI:
27771 case INT_FTYPE_V8QI:
27772 case INT_FTYPE_V8SF:
27773 case INT_FTYPE_V4DF:
27774 case INT_FTYPE_V4SF:
27775 case INT_FTYPE_V2DF:
27776 case INT_FTYPE_V32QI:
27777 case V16QI_FTYPE_V16QI:
27778 case V8SI_FTYPE_V8SF:
27779 case V8SI_FTYPE_V4SI:
27780 case V8HI_FTYPE_V8HI:
27781 case V8HI_FTYPE_V16QI:
27782 case V8QI_FTYPE_V8QI:
27783 case V8SF_FTYPE_V8SF:
27784 case V8SF_FTYPE_V8SI:
27785 case V8SF_FTYPE_V4SF:
27786 case V8SF_FTYPE_V8HI:
27787 case V4SI_FTYPE_V4SI:
27788 case V4SI_FTYPE_V16QI:
27789 case V4SI_FTYPE_V4SF:
27790 case V4SI_FTYPE_V8SI:
27791 case V4SI_FTYPE_V8HI:
27792 case V4SI_FTYPE_V4DF:
27793 case V4SI_FTYPE_V2DF:
27794 case V4HI_FTYPE_V4HI:
27795 case V4DF_FTYPE_V4DF:
27796 case V4DF_FTYPE_V4SI:
27797 case V4DF_FTYPE_V4SF:
27798 case V4DF_FTYPE_V2DF:
27799 case V4SF_FTYPE_V4SF:
27800 case V4SF_FTYPE_V4SI:
27801 case V4SF_FTYPE_V8SF:
27802 case V4SF_FTYPE_V4DF:
27803 case V4SF_FTYPE_V8HI:
27804 case V4SF_FTYPE_V2DF:
27805 case V2DI_FTYPE_V2DI:
27806 case V2DI_FTYPE_V16QI:
27807 case V2DI_FTYPE_V8HI:
27808 case V2DI_FTYPE_V4SI:
27809 case V2DF_FTYPE_V2DF:
27810 case V2DF_FTYPE_V4SI:
27811 case V2DF_FTYPE_V4DF:
27812 case V2DF_FTYPE_V4SF:
27813 case V2DF_FTYPE_V2SI:
27814 case V2SI_FTYPE_V2SI:
27815 case V2SI_FTYPE_V4SF:
27816 case V2SI_FTYPE_V2SF:
27817 case V2SI_FTYPE_V2DF:
27818 case V2SF_FTYPE_V2SF:
27819 case V2SF_FTYPE_V2SI:
27820 case V32QI_FTYPE_V32QI:
27821 case V32QI_FTYPE_V16QI:
27822 case V16HI_FTYPE_V16HI:
27823 case V16HI_FTYPE_V8HI:
27824 case V8SI_FTYPE_V8SI:
27825 case V16HI_FTYPE_V16QI:
27826 case V8SI_FTYPE_V16QI:
27827 case V4DI_FTYPE_V16QI:
27828 case V8SI_FTYPE_V8HI:
27829 case V4DI_FTYPE_V8HI:
27830 case V4DI_FTYPE_V4SI:
27831 case V4DI_FTYPE_V2DI:
27834 case V4SF_FTYPE_V4SF_VEC_MERGE:
27835 case V2DF_FTYPE_V2DF_VEC_MERGE:
27836 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
27837 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
27838 case V16QI_FTYPE_V16QI_V16QI:
27839 case V16QI_FTYPE_V8HI_V8HI:
27840 case V8QI_FTYPE_V8QI_V8QI:
27841 case V8QI_FTYPE_V4HI_V4HI:
27842 case V8HI_FTYPE_V8HI_V8HI:
27843 case V8HI_FTYPE_V16QI_V16QI:
27844 case V8HI_FTYPE_V4SI_V4SI:
27845 case V8SF_FTYPE_V8SF_V8SF:
27846 case V8SF_FTYPE_V8SF_V8SI:
27847 case V4SI_FTYPE_V4SI_V4SI:
27848 case V4SI_FTYPE_V8HI_V8HI:
27849 case V4SI_FTYPE_V4SF_V4SF:
27850 case V4SI_FTYPE_V2DF_V2DF:
27851 case V4HI_FTYPE_V4HI_V4HI:
27852 case V4HI_FTYPE_V8QI_V8QI:
27853 case V4HI_FTYPE_V2SI_V2SI:
27854 case V4DF_FTYPE_V4DF_V4DF:
27855 case V4DF_FTYPE_V4DF_V4DI:
27856 case V4SF_FTYPE_V4SF_V4SF:
27857 case V4SF_FTYPE_V4SF_V4SI:
27858 case V4SF_FTYPE_V4SF_V2SI:
27859 case V4SF_FTYPE_V4SF_V2DF:
27860 case V4SF_FTYPE_V4SF_DI:
27861 case V4SF_FTYPE_V4SF_SI:
27862 case V2DI_FTYPE_V2DI_V2DI:
27863 case V2DI_FTYPE_V16QI_V16QI:
27864 case V2DI_FTYPE_V4SI_V4SI:
27865 case V2DI_FTYPE_V2DI_V16QI:
27866 case V2DI_FTYPE_V2DF_V2DF:
27867 case V2SI_FTYPE_V2SI_V2SI:
27868 case V2SI_FTYPE_V4HI_V4HI:
27869 case V2SI_FTYPE_V2SF_V2SF:
27870 case V2DF_FTYPE_V2DF_V2DF:
27871 case V2DF_FTYPE_V2DF_V4SF:
27872 case V2DF_FTYPE_V2DF_V2DI:
27873 case V2DF_FTYPE_V2DF_DI:
27874 case V2DF_FTYPE_V2DF_SI:
27875 case V2SF_FTYPE_V2SF_V2SF:
27876 case V1DI_FTYPE_V1DI_V1DI:
27877 case V1DI_FTYPE_V8QI_V8QI:
27878 case V1DI_FTYPE_V2SI_V2SI:
27879 case V32QI_FTYPE_V16HI_V16HI:
27880 case V16HI_FTYPE_V8SI_V8SI:
27881 case V32QI_FTYPE_V32QI_V32QI:
27882 case V16HI_FTYPE_V32QI_V32QI:
27883 case V16HI_FTYPE_V16HI_V16HI:
27884 case V8SI_FTYPE_V8SI_V8SI:
27885 case V8SI_FTYPE_V16HI_V16HI:
27886 case V4DI_FTYPE_V4DI_V4DI:
27887 case V4DI_FTYPE_V8SI_V8SI:
27888 if (comparison == UNKNOWN)
27889 return ix86_expand_binop_builtin (icode, exp, target);
27892 case V4SF_FTYPE_V4SF_V4SF_SWAP:
27893 case V2DF_FTYPE_V2DF_V2DF_SWAP:
27894 gcc_assert (comparison != UNKNOWN);
27898 case V16HI_FTYPE_V16HI_V8HI_COUNT:
27899 case V16HI_FTYPE_V16HI_SI_COUNT:
27900 case V8SI_FTYPE_V8SI_V4SI_COUNT:
27901 case V8SI_FTYPE_V8SI_SI_COUNT:
27902 case V4DI_FTYPE_V4DI_V2DI_COUNT:
27903 case V4DI_FTYPE_V4DI_INT_COUNT:
27904 case V8HI_FTYPE_V8HI_V8HI_COUNT:
27905 case V8HI_FTYPE_V8HI_SI_COUNT:
27906 case V4SI_FTYPE_V4SI_V4SI_COUNT:
27907 case V4SI_FTYPE_V4SI_SI_COUNT:
27908 case V4HI_FTYPE_V4HI_V4HI_COUNT:
27909 case V4HI_FTYPE_V4HI_SI_COUNT:
27910 case V2DI_FTYPE_V2DI_V2DI_COUNT:
27911 case V2DI_FTYPE_V2DI_SI_COUNT:
27912 case V2SI_FTYPE_V2SI_V2SI_COUNT:
27913 case V2SI_FTYPE_V2SI_SI_COUNT:
27914 case V1DI_FTYPE_V1DI_V1DI_COUNT:
27915 case V1DI_FTYPE_V1DI_SI_COUNT:
27917 last_arg_count = true;
27919 case UINT64_FTYPE_UINT64_UINT64:
27920 case UINT_FTYPE_UINT_UINT:
27921 case UINT_FTYPE_UINT_USHORT:
27922 case UINT_FTYPE_UINT_UCHAR:
27923 case UINT16_FTYPE_UINT16_INT:
27924 case UINT8_FTYPE_UINT8_INT:
27927 case V2DI_FTYPE_V2DI_INT_CONVERT:
27930 nargs_constant = 1;
27932 case V4DI_FTYPE_V4DI_INT_CONVERT:
27935 nargs_constant = 1;
27937 case V8HI_FTYPE_V8HI_INT:
27938 case V8HI_FTYPE_V8SF_INT:
27939 case V8HI_FTYPE_V4SF_INT:
27940 case V8SF_FTYPE_V8SF_INT:
27941 case V4SI_FTYPE_V4SI_INT:
27942 case V4SI_FTYPE_V8SI_INT:
27943 case V4HI_FTYPE_V4HI_INT:
27944 case V4DF_FTYPE_V4DF_INT:
27945 case V4SF_FTYPE_V4SF_INT:
27946 case V4SF_FTYPE_V8SF_INT:
27947 case V2DI_FTYPE_V2DI_INT:
27948 case V2DF_FTYPE_V2DF_INT:
27949 case V2DF_FTYPE_V4DF_INT:
27950 case V16HI_FTYPE_V16HI_INT:
27951 case V8SI_FTYPE_V8SI_INT:
27952 case V4DI_FTYPE_V4DI_INT:
27953 case V2DI_FTYPE_V4DI_INT:
27955 nargs_constant = 1;
27957 case V16QI_FTYPE_V16QI_V16QI_V16QI:
27958 case V8SF_FTYPE_V8SF_V8SF_V8SF:
27959 case V4DF_FTYPE_V4DF_V4DF_V4DF:
27960 case V4SF_FTYPE_V4SF_V4SF_V4SF:
27961 case V2DF_FTYPE_V2DF_V2DF_V2DF:
27962 case V32QI_FTYPE_V32QI_V32QI_V32QI:
27965 case V32QI_FTYPE_V32QI_V32QI_INT:
27966 case V16HI_FTYPE_V16HI_V16HI_INT:
27967 case V16QI_FTYPE_V16QI_V16QI_INT:
27968 case V4DI_FTYPE_V4DI_V4DI_INT:
27969 case V8HI_FTYPE_V8HI_V8HI_INT:
27970 case V8SI_FTYPE_V8SI_V8SI_INT:
27971 case V8SI_FTYPE_V8SI_V4SI_INT:
27972 case V8SF_FTYPE_V8SF_V8SF_INT:
27973 case V8SF_FTYPE_V8SF_V4SF_INT:
27974 case V4SI_FTYPE_V4SI_V4SI_INT:
27975 case V4DF_FTYPE_V4DF_V4DF_INT:
27976 case V4DF_FTYPE_V4DF_V2DF_INT:
27977 case V4SF_FTYPE_V4SF_V4SF_INT:
27978 case V2DI_FTYPE_V2DI_V2DI_INT:
27979 case V4DI_FTYPE_V4DI_V2DI_INT:
27980 case V2DF_FTYPE_V2DF_V2DF_INT:
27982 nargs_constant = 1;
27984 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
27987 nargs_constant = 1;
27989 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
27992 nargs_constant = 1;
27994 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
27997 nargs_constant = 1;
27999 case V2DI_FTYPE_V2DI_UINT_UINT:
28001 nargs_constant = 2;
28003 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
28004 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
28005 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
28006 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
28008 nargs_constant = 1;
28010 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
28012 nargs_constant = 2;
28015 gcc_unreachable ();
28018 gcc_assert (nargs <= ARRAY_SIZE (args));
28020 if (comparison != UNKNOWN)
28022 gcc_assert (nargs == 2);
28023 return ix86_expand_sse_compare (d, exp, target, swap);
28026 if (rmode == VOIDmode || rmode == tmode)
28030 || GET_MODE (target) != tmode
28031 || !insn_p->operand[0].predicate (target, tmode))
28032 target = gen_reg_rtx (tmode);
28033 real_target = target;
28037 target = gen_reg_rtx (rmode);
28038 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
28041 for (i = 0; i < nargs; i++)
28043 tree arg = CALL_EXPR_ARG (exp, i);
28044 rtx op = expand_normal (arg);
28045 enum machine_mode mode = insn_p->operand[i + 1].mode;
28046 bool match = insn_p->operand[i + 1].predicate (op, mode);
28048 if (last_arg_count && (i + 1) == nargs)
28050 /* SIMD shift insns take either an 8-bit immediate or
28051 register as count. But builtin functions take int as
28052 count. If count doesn't match, we put it in register. */
28055 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
28056 if (!insn_p->operand[i + 1].predicate (op, mode))
28057 op = copy_to_reg (op);
28060 else if ((nargs - i) <= nargs_constant)
28065 case CODE_FOR_avx2_inserti128:
28066 case CODE_FOR_avx2_extracti128:
28067 error ("the last argument must be an 1-bit immediate");
28070 case CODE_FOR_sse4_1_roundpd:
28071 case CODE_FOR_sse4_1_roundps:
28072 case CODE_FOR_sse4_1_roundsd:
28073 case CODE_FOR_sse4_1_roundss:
28074 case CODE_FOR_sse4_1_blendps:
28075 case CODE_FOR_avx_blendpd256:
28076 case CODE_FOR_avx_vpermilv4df:
28077 case CODE_FOR_avx_roundpd256:
28078 case CODE_FOR_avx_roundps256:
28079 error ("the last argument must be a 4-bit immediate");
28082 case CODE_FOR_sse4_1_blendpd:
28083 case CODE_FOR_avx_vpermilv2df:
28084 case CODE_FOR_xop_vpermil2v2df3:
28085 case CODE_FOR_xop_vpermil2v4sf3:
28086 case CODE_FOR_xop_vpermil2v4df3:
28087 case CODE_FOR_xop_vpermil2v8sf3:
28088 error ("the last argument must be a 2-bit immediate");
28091 case CODE_FOR_avx_vextractf128v4df:
28092 case CODE_FOR_avx_vextractf128v8sf:
28093 case CODE_FOR_avx_vextractf128v8si:
28094 case CODE_FOR_avx_vinsertf128v4df:
28095 case CODE_FOR_avx_vinsertf128v8sf:
28096 case CODE_FOR_avx_vinsertf128v8si:
28097 error ("the last argument must be a 1-bit immediate");
28100 case CODE_FOR_avx_vmcmpv2df3:
28101 case CODE_FOR_avx_vmcmpv4sf3:
28102 case CODE_FOR_avx_cmpv2df3:
28103 case CODE_FOR_avx_cmpv4sf3:
28104 case CODE_FOR_avx_cmpv4df3:
28105 case CODE_FOR_avx_cmpv8sf3:
28106 error ("the last argument must be a 5-bit immediate");
28110 switch (nargs_constant)
28113 if ((nargs - i) == nargs_constant)
28115 error ("the next to last argument must be an 8-bit immediate");
28119 error ("the last argument must be an 8-bit immediate");
28122 gcc_unreachable ();
28129 if (VECTOR_MODE_P (mode))
28130 op = safe_vector_operand (op, mode);
28132 /* If we aren't optimizing, only allow one memory operand to
28134 if (memory_operand (op, mode))
28137 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
28139 if (optimize || !match || num_memory > 1)
28140 op = copy_to_mode_reg (mode, op);
28144 op = copy_to_reg (op);
28145 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
28150 args[i].mode = mode;
28156 pat = GEN_FCN (icode) (real_target, args[0].op);
28159 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
28162 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
28166 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
28167 args[2].op, args[3].op);
28170 gcc_unreachable ();
28180 /* Subroutine of ix86_expand_builtin to take care of special insns
28181 with variable number of operands. */
28184 ix86_expand_special_args_builtin (const struct builtin_description *d,
28185 tree exp, rtx target)
28189 unsigned int i, nargs, arg_adjust, memory;
28193 enum machine_mode mode;
28195 enum insn_code icode = d->icode;
28196 bool last_arg_constant = false;
28197 const struct insn_data_d *insn_p = &insn_data[icode];
28198 enum machine_mode tmode = insn_p->operand[0].mode;
28199 enum { load, store } klass;
28201 switch ((enum ix86_builtin_func_type) d->flag)
28203 case VOID_FTYPE_VOID:
28204 if (icode == CODE_FOR_avx_vzeroupper)
28205 target = GEN_INT (vzeroupper_intrinsic);
28206 emit_insn (GEN_FCN (icode) (target));
28208 case VOID_FTYPE_UINT64:
28209 case VOID_FTYPE_UNSIGNED:
28214 case UINT64_FTYPE_VOID:
28215 case UNSIGNED_FTYPE_VOID:
28220 case UINT64_FTYPE_PUNSIGNED:
28221 case V2DI_FTYPE_PV2DI:
28222 case V4DI_FTYPE_PV4DI:
28223 case V32QI_FTYPE_PCCHAR:
28224 case V16QI_FTYPE_PCCHAR:
28225 case V8SF_FTYPE_PCV4SF:
28226 case V8SF_FTYPE_PCFLOAT:
28227 case V4SF_FTYPE_PCFLOAT:
28228 case V4DF_FTYPE_PCV2DF:
28229 case V4DF_FTYPE_PCDOUBLE:
28230 case V2DF_FTYPE_PCDOUBLE:
28231 case VOID_FTYPE_PVOID:
28236 case VOID_FTYPE_PV2SF_V4SF:
28237 case VOID_FTYPE_PV4DI_V4DI:
28238 case VOID_FTYPE_PV2DI_V2DI:
28239 case VOID_FTYPE_PCHAR_V32QI:
28240 case VOID_FTYPE_PCHAR_V16QI:
28241 case VOID_FTYPE_PFLOAT_V8SF:
28242 case VOID_FTYPE_PFLOAT_V4SF:
28243 case VOID_FTYPE_PDOUBLE_V4DF:
28244 case VOID_FTYPE_PDOUBLE_V2DF:
28245 case VOID_FTYPE_PULONGLONG_ULONGLONG:
28246 case VOID_FTYPE_PINT_INT:
28249 /* Reserve memory operand for target. */
28250 memory = ARRAY_SIZE (args);
28252 case V4SF_FTYPE_V4SF_PCV2SF:
28253 case V2DF_FTYPE_V2DF_PCDOUBLE:
28258 case V8SF_FTYPE_PCV8SF_V8SI:
28259 case V4DF_FTYPE_PCV4DF_V4DI:
28260 case V4SF_FTYPE_PCV4SF_V4SI:
28261 case V2DF_FTYPE_PCV2DF_V2DI:
28262 case V8SI_FTYPE_PCV8SI_V8SI:
28263 case V4DI_FTYPE_PCV4DI_V4DI:
28264 case V4SI_FTYPE_PCV4SI_V4SI:
28265 case V2DI_FTYPE_PCV2DI_V2DI:
28270 case VOID_FTYPE_PV8SF_V8SI_V8SF:
28271 case VOID_FTYPE_PV4DF_V4DI_V4DF:
28272 case VOID_FTYPE_PV4SF_V4SI_V4SF:
28273 case VOID_FTYPE_PV2DF_V2DI_V2DF:
28274 case VOID_FTYPE_PV8SI_V8SI_V8SI:
28275 case VOID_FTYPE_PV4DI_V4DI_V4DI:
28276 case VOID_FTYPE_PV4SI_V4SI_V4SI:
28277 case VOID_FTYPE_PV2DI_V2DI_V2DI:
28280 /* Reserve memory operand for target. */
28281 memory = ARRAY_SIZE (args);
28283 case VOID_FTYPE_UINT_UINT_UINT:
28284 case VOID_FTYPE_UINT64_UINT_UINT:
28285 case UCHAR_FTYPE_UINT_UINT_UINT:
28286 case UCHAR_FTYPE_UINT64_UINT_UINT:
28289 memory = ARRAY_SIZE (args);
28290 last_arg_constant = true;
28293 gcc_unreachable ();
28296 gcc_assert (nargs <= ARRAY_SIZE (args));
28298 if (klass == store)
28300 arg = CALL_EXPR_ARG (exp, 0);
28301 op = expand_normal (arg);
28302 gcc_assert (target == 0);
28305 if (GET_MODE (op) != Pmode)
28306 op = convert_to_mode (Pmode, op, 1);
28307 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
28310 target = force_reg (tmode, op);
28318 || GET_MODE (target) != tmode
28319 || !insn_p->operand[0].predicate (target, tmode))
28320 target = gen_reg_rtx (tmode);
28323 for (i = 0; i < nargs; i++)
28325 enum machine_mode mode = insn_p->operand[i + 1].mode;
28328 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
28329 op = expand_normal (arg);
28330 match = insn_p->operand[i + 1].predicate (op, mode);
28332 if (last_arg_constant && (i + 1) == nargs)
28336 if (icode == CODE_FOR_lwp_lwpvalsi3
28337 || icode == CODE_FOR_lwp_lwpinssi3
28338 || icode == CODE_FOR_lwp_lwpvaldi3
28339 || icode == CODE_FOR_lwp_lwpinsdi3)
28340 error ("the last argument must be a 32-bit immediate");
28342 error ("the last argument must be an 8-bit immediate");
28350 /* This must be the memory operand. */
28351 if (GET_MODE (op) != Pmode)
28352 op = convert_to_mode (Pmode, op, 1);
28353 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
28354 gcc_assert (GET_MODE (op) == mode
28355 || GET_MODE (op) == VOIDmode);
28359 /* This must be register. */
28360 if (VECTOR_MODE_P (mode))
28361 op = safe_vector_operand (op, mode);
28363 gcc_assert (GET_MODE (op) == mode
28364 || GET_MODE (op) == VOIDmode);
28365 op = copy_to_mode_reg (mode, op);
28370 args[i].mode = mode;
28376 pat = GEN_FCN (icode) (target);
28379 pat = GEN_FCN (icode) (target, args[0].op);
28382 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
28385 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
28388 gcc_unreachable ();
28394 return klass == store ? 0 : target;
28397 /* Return the integer constant in ARG. Constrain it to be in the range
28398 of the subparts of VEC_TYPE; issue an error if not. */
28401 get_element_number (tree vec_type, tree arg)
28403 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
28405 if (!host_integerp (arg, 1)
28406 || (elt = tree_low_cst (arg, 1), elt > max))
28408 error ("selector must be an integer constant in the range 0..%wi", max);
28415 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28416 ix86_expand_vector_init. We DO have language-level syntax for this, in
28417 the form of (type){ init-list }. Except that since we can't place emms
28418 instructions from inside the compiler, we can't allow the use of MMX
28419 registers unless the user explicitly asks for it. So we do *not* define
28420 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
28421 we have builtins invoked by mmintrin.h that gives us license to emit
28422 these sorts of instructions. */
28425 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
28427 enum machine_mode tmode = TYPE_MODE (type);
28428 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
28429 int i, n_elt = GET_MODE_NUNITS (tmode);
28430 rtvec v = rtvec_alloc (n_elt);
28432 gcc_assert (VECTOR_MODE_P (tmode));
28433 gcc_assert (call_expr_nargs (exp) == n_elt);
28435 for (i = 0; i < n_elt; ++i)
28437 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
28438 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
28441 if (!target || !register_operand (target, tmode))
28442 target = gen_reg_rtx (tmode);
28444 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
28448 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28449 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
28450 had a language-level syntax for referencing vector elements. */
28453 ix86_expand_vec_ext_builtin (tree exp, rtx target)
28455 enum machine_mode tmode, mode0;
28460 arg0 = CALL_EXPR_ARG (exp, 0);
28461 arg1 = CALL_EXPR_ARG (exp, 1);
28463 op0 = expand_normal (arg0);
28464 elt = get_element_number (TREE_TYPE (arg0), arg1);
28466 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
28467 mode0 = TYPE_MODE (TREE_TYPE (arg0));
28468 gcc_assert (VECTOR_MODE_P (mode0));
28470 op0 = force_reg (mode0, op0);
28472 if (optimize || !target || !register_operand (target, tmode))
28473 target = gen_reg_rtx (tmode);
28475 ix86_expand_vector_extract (true, target, op0, elt);
28480 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28481 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
28482 a language-level syntax for referencing vector elements. */
28485 ix86_expand_vec_set_builtin (tree exp)
28487 enum machine_mode tmode, mode1;
28488 tree arg0, arg1, arg2;
28490 rtx op0, op1, target;
28492 arg0 = CALL_EXPR_ARG (exp, 0);
28493 arg1 = CALL_EXPR_ARG (exp, 1);
28494 arg2 = CALL_EXPR_ARG (exp, 2);
28496 tmode = TYPE_MODE (TREE_TYPE (arg0));
28497 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
28498 gcc_assert (VECTOR_MODE_P (tmode));
28500 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
28501 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
28502 elt = get_element_number (TREE_TYPE (arg0), arg2);
28504 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
28505 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
28507 op0 = force_reg (tmode, op0);
28508 op1 = force_reg (mode1, op1);
28510 /* OP0 is the source of these builtin functions and shouldn't be
28511 modified. Create a copy, use it and return it as target. */
28512 target = gen_reg_rtx (tmode);
28513 emit_move_insn (target, op0);
28514 ix86_expand_vector_set (true, target, op1, elt);
28519 /* Expand an expression EXP that calls a built-in function,
28520 with result going to TARGET if that's convenient
28521 (and in mode MODE if that's convenient).
28522 SUBTARGET may be used as the target for computing one of EXP's operands.
28523 IGNORE is nonzero if the value is to be ignored. */
28526 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
28527 enum machine_mode mode ATTRIBUTE_UNUSED,
28528 int ignore ATTRIBUTE_UNUSED)
28530 const struct builtin_description *d;
28532 enum insn_code icode;
28533 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
28534 tree arg0, arg1, arg2, arg3, arg4;
28535 rtx op0, op1, op2, op3, op4, pat;
28536 enum machine_mode mode0, mode1, mode2, mode3, mode4;
28537 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
28539 /* Determine whether the builtin function is available under the current ISA.
28540 Originally the builtin was not created if it wasn't applicable to the
28541 current ISA based on the command line switches. With function specific
28542 options, we need to check in the context of the function making the call
28543 whether it is supported. */
28544 if (ix86_builtins_isa[fcode].isa
28545 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
28547 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
28548 NULL, (enum fpmath_unit) 0, false);
28551 error ("%qE needs unknown isa option", fndecl);
28554 gcc_assert (opts != NULL);
28555 error ("%qE needs isa option %s", fndecl, opts);
28563 case IX86_BUILTIN_MASKMOVQ:
28564 case IX86_BUILTIN_MASKMOVDQU:
28565 icode = (fcode == IX86_BUILTIN_MASKMOVQ
28566 ? CODE_FOR_mmx_maskmovq
28567 : CODE_FOR_sse2_maskmovdqu);
28568 /* Note the arg order is different from the operand order. */
28569 arg1 = CALL_EXPR_ARG (exp, 0);
28570 arg2 = CALL_EXPR_ARG (exp, 1);
28571 arg0 = CALL_EXPR_ARG (exp, 2);
28572 op0 = expand_normal (arg0);
28573 op1 = expand_normal (arg1);
28574 op2 = expand_normal (arg2);
28575 mode0 = insn_data[icode].operand[0].mode;
28576 mode1 = insn_data[icode].operand[1].mode;
28577 mode2 = insn_data[icode].operand[2].mode;
28579 if (GET_MODE (op0) != Pmode)
28580 op0 = convert_to_mode (Pmode, op0, 1);
28581 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
28583 if (!insn_data[icode].operand[0].predicate (op0, mode0))
28584 op0 = copy_to_mode_reg (mode0, op0);
28585 if (!insn_data[icode].operand[1].predicate (op1, mode1))
28586 op1 = copy_to_mode_reg (mode1, op1);
28587 if (!insn_data[icode].operand[2].predicate (op2, mode2))
28588 op2 = copy_to_mode_reg (mode2, op2);
28589 pat = GEN_FCN (icode) (op0, op1, op2);
28595 case IX86_BUILTIN_LDMXCSR:
28596 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
28597 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
28598 emit_move_insn (target, op0);
28599 emit_insn (gen_sse_ldmxcsr (target));
28602 case IX86_BUILTIN_STMXCSR:
28603 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
28604 emit_insn (gen_sse_stmxcsr (target));
28605 return copy_to_mode_reg (SImode, target);
28607 case IX86_BUILTIN_CLFLUSH:
28608 arg0 = CALL_EXPR_ARG (exp, 0);
28609 op0 = expand_normal (arg0);
28610 icode = CODE_FOR_sse2_clflush;
28611 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
28613 if (GET_MODE (op0) != Pmode)
28614 op0 = convert_to_mode (Pmode, op0, 1);
28615 op0 = force_reg (Pmode, op0);
28618 emit_insn (gen_sse2_clflush (op0));
28621 case IX86_BUILTIN_MONITOR:
28622 arg0 = CALL_EXPR_ARG (exp, 0);
28623 arg1 = CALL_EXPR_ARG (exp, 1);
28624 arg2 = CALL_EXPR_ARG (exp, 2);
28625 op0 = expand_normal (arg0);
28626 op1 = expand_normal (arg1);
28627 op2 = expand_normal (arg2);
28630 if (GET_MODE (op0) != Pmode)
28631 op0 = convert_to_mode (Pmode, op0, 1);
28632 op0 = force_reg (Pmode, op0);
28635 op1 = copy_to_mode_reg (SImode, op1);
28637 op2 = copy_to_mode_reg (SImode, op2);
28638 emit_insn (ix86_gen_monitor (op0, op1, op2));
28641 case IX86_BUILTIN_MWAIT:
28642 arg0 = CALL_EXPR_ARG (exp, 0);
28643 arg1 = CALL_EXPR_ARG (exp, 1);
28644 op0 = expand_normal (arg0);
28645 op1 = expand_normal (arg1);
28647 op0 = copy_to_mode_reg (SImode, op0);
28649 op1 = copy_to_mode_reg (SImode, op1);
28650 emit_insn (gen_sse3_mwait (op0, op1));
28653 case IX86_BUILTIN_VEC_INIT_V2SI:
28654 case IX86_BUILTIN_VEC_INIT_V4HI:
28655 case IX86_BUILTIN_VEC_INIT_V8QI:
28656 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
28658 case IX86_BUILTIN_VEC_EXT_V2DF:
28659 case IX86_BUILTIN_VEC_EXT_V2DI:
28660 case IX86_BUILTIN_VEC_EXT_V4SF:
28661 case IX86_BUILTIN_VEC_EXT_V4SI:
28662 case IX86_BUILTIN_VEC_EXT_V8HI:
28663 case IX86_BUILTIN_VEC_EXT_V2SI:
28664 case IX86_BUILTIN_VEC_EXT_V4HI:
28665 case IX86_BUILTIN_VEC_EXT_V16QI:
28666 return ix86_expand_vec_ext_builtin (exp, target);
28668 case IX86_BUILTIN_VEC_SET_V2DI:
28669 case IX86_BUILTIN_VEC_SET_V4SF:
28670 case IX86_BUILTIN_VEC_SET_V4SI:
28671 case IX86_BUILTIN_VEC_SET_V8HI:
28672 case IX86_BUILTIN_VEC_SET_V4HI:
28673 case IX86_BUILTIN_VEC_SET_V16QI:
28674 return ix86_expand_vec_set_builtin (exp);
28676 case IX86_BUILTIN_INFQ:
28677 case IX86_BUILTIN_HUGE_VALQ:
28679 REAL_VALUE_TYPE inf;
28683 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
28685 tmp = validize_mem (force_const_mem (mode, tmp));
28688 target = gen_reg_rtx (mode);
28690 emit_move_insn (target, tmp);
28694 case IX86_BUILTIN_LLWPCB:
28695 arg0 = CALL_EXPR_ARG (exp, 0);
28696 op0 = expand_normal (arg0);
28697 icode = CODE_FOR_lwp_llwpcb;
28698 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
28700 if (GET_MODE (op0) != Pmode)
28701 op0 = convert_to_mode (Pmode, op0, 1);
28702 op0 = force_reg (Pmode, op0);
28704 emit_insn (gen_lwp_llwpcb (op0));
28707 case IX86_BUILTIN_SLWPCB:
28708 icode = CODE_FOR_lwp_slwpcb;
28710 || !insn_data[icode].operand[0].predicate (target, Pmode))
28711 target = gen_reg_rtx (Pmode);
28712 emit_insn (gen_lwp_slwpcb (target));
28715 case IX86_BUILTIN_BEXTRI32:
28716 case IX86_BUILTIN_BEXTRI64:
28717 arg0 = CALL_EXPR_ARG (exp, 0);
28718 arg1 = CALL_EXPR_ARG (exp, 1);
28719 op0 = expand_normal (arg0);
28720 op1 = expand_normal (arg1);
28721 icode = (fcode == IX86_BUILTIN_BEXTRI32
28722 ? CODE_FOR_tbm_bextri_si
28723 : CODE_FOR_tbm_bextri_di);
28724 if (!CONST_INT_P (op1))
28726 error ("last argument must be an immediate");
28731 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
28732 unsigned char lsb_index = INTVAL (op1) & 0xFF;
28733 op1 = GEN_INT (length);
28734 op2 = GEN_INT (lsb_index);
28735 pat = GEN_FCN (icode) (target, op0, op1, op2);
28741 case IX86_BUILTIN_RDRAND16_STEP:
28742 icode = CODE_FOR_rdrandhi_1;
28746 case IX86_BUILTIN_RDRAND32_STEP:
28747 icode = CODE_FOR_rdrandsi_1;
28751 case IX86_BUILTIN_RDRAND64_STEP:
28752 icode = CODE_FOR_rdranddi_1;
28756 op0 = gen_reg_rtx (mode0);
28757 emit_insn (GEN_FCN (icode) (op0));
28759 arg0 = CALL_EXPR_ARG (exp, 0);
28760 op1 = expand_normal (arg0);
28761 if (!address_operand (op1, VOIDmode))
28763 op1 = convert_memory_address (Pmode, op1);
28764 op1 = copy_addr_to_reg (op1);
28766 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
28768 op1 = gen_reg_rtx (SImode);
28769 emit_move_insn (op1, CONST1_RTX (SImode));
28771 /* Emit SImode conditional move. */
28772 if (mode0 == HImode)
28774 op2 = gen_reg_rtx (SImode);
28775 emit_insn (gen_zero_extendhisi2 (op2, op0));
28777 else if (mode0 == SImode)
28780 op2 = gen_rtx_SUBREG (SImode, op0, 0);
28783 target = gen_reg_rtx (SImode);
28785 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
28787 emit_insn (gen_rtx_SET (VOIDmode, target,
28788 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
28791 case IX86_BUILTIN_GATHERSIV2DF:
28792 icode = CODE_FOR_avx2_gathersiv2df;
28794 case IX86_BUILTIN_GATHERSIV4DF:
28795 icode = CODE_FOR_avx2_gathersiv4df;
28797 case IX86_BUILTIN_GATHERDIV2DF:
28798 icode = CODE_FOR_avx2_gatherdiv2df;
28800 case IX86_BUILTIN_GATHERDIV4DF:
28801 icode = CODE_FOR_avx2_gatherdiv4df;
28803 case IX86_BUILTIN_GATHERSIV4SF:
28804 icode = CODE_FOR_avx2_gathersiv4sf;
28806 case IX86_BUILTIN_GATHERSIV8SF:
28807 icode = CODE_FOR_avx2_gathersiv8sf;
28809 case IX86_BUILTIN_GATHERDIV4SF:
28810 icode = CODE_FOR_avx2_gatherdiv4sf;
28812 case IX86_BUILTIN_GATHERDIV8SF:
28813 icode = CODE_FOR_avx2_gatherdiv4sf256;
28815 case IX86_BUILTIN_GATHERSIV2DI:
28816 icode = CODE_FOR_avx2_gathersiv2di;
28818 case IX86_BUILTIN_GATHERSIV4DI:
28819 icode = CODE_FOR_avx2_gathersiv4di;
28821 case IX86_BUILTIN_GATHERDIV2DI:
28822 icode = CODE_FOR_avx2_gatherdiv2di;
28824 case IX86_BUILTIN_GATHERDIV4DI:
28825 icode = CODE_FOR_avx2_gatherdiv4di;
28827 case IX86_BUILTIN_GATHERSIV4SI:
28828 icode = CODE_FOR_avx2_gathersiv4si;
28830 case IX86_BUILTIN_GATHERSIV8SI:
28831 icode = CODE_FOR_avx2_gathersiv8si;
28833 case IX86_BUILTIN_GATHERDIV4SI:
28834 icode = CODE_FOR_avx2_gatherdiv4si;
28836 case IX86_BUILTIN_GATHERDIV8SI:
28837 icode = CODE_FOR_avx2_gatherdiv4si256;
28840 arg0 = CALL_EXPR_ARG (exp, 0);
28841 arg1 = CALL_EXPR_ARG (exp, 1);
28842 arg2 = CALL_EXPR_ARG (exp, 2);
28843 arg3 = CALL_EXPR_ARG (exp, 3);
28844 arg4 = CALL_EXPR_ARG (exp, 4);
28845 op0 = expand_normal (arg0);
28846 op1 = expand_normal (arg1);
28847 op2 = expand_normal (arg2);
28848 op3 = expand_normal (arg3);
28849 op4 = expand_normal (arg4);
28850 /* Note the arg order is different from the operand order. */
28851 mode0 = insn_data[icode].operand[1].mode;
28852 mode2 = insn_data[icode].operand[3].mode;
28853 mode3 = insn_data[icode].operand[4].mode;
28854 mode4 = insn_data[icode].operand[5].mode;
28856 if (target == NULL_RTX)
28857 target = gen_reg_rtx (insn_data[icode].operand[0].mode);
28859 /* Force memory operand only with base register here. But we
28860 don't want to do it on memory operand for other builtin
28862 if (GET_MODE (op1) != Pmode)
28863 op1 = convert_to_mode (Pmode, op1, 1);
28864 op1 = force_reg (Pmode, op1);
28866 if (!insn_data[icode].operand[1].predicate (op0, mode0))
28867 op0 = copy_to_mode_reg (mode0, op0);
28868 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
28869 op1 = copy_to_mode_reg (Pmode, op1);
28870 if (!insn_data[icode].operand[3].predicate (op2, mode2))
28871 op2 = copy_to_mode_reg (mode2, op2);
28872 if (!insn_data[icode].operand[4].predicate (op3, mode3))
28873 op3 = copy_to_mode_reg (mode3, op3);
28874 if (!insn_data[icode].operand[5].predicate (op4, mode4))
28876 error ("last argument must be scale 1, 2, 4, 8");
28879 pat = GEN_FCN (icode) (target, op0, op1, op2, op3, op4);
28889 for (i = 0, d = bdesc_special_args;
28890 i < ARRAY_SIZE (bdesc_special_args);
28892 if (d->code == fcode)
28893 return ix86_expand_special_args_builtin (d, exp, target);
28895 for (i = 0, d = bdesc_args;
28896 i < ARRAY_SIZE (bdesc_args);
28898 if (d->code == fcode)
28901 case IX86_BUILTIN_FABSQ:
28902 case IX86_BUILTIN_COPYSIGNQ:
28904 /* Emit a normal call if SSE2 isn't available. */
28905 return expand_call (exp, target, ignore);
28907 return ix86_expand_args_builtin (d, exp, target);
28910 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
28911 if (d->code == fcode)
28912 return ix86_expand_sse_comi (d, exp, target);
28914 for (i = 0, d = bdesc_pcmpestr;
28915 i < ARRAY_SIZE (bdesc_pcmpestr);
28917 if (d->code == fcode)
28918 return ix86_expand_sse_pcmpestr (d, exp, target);
28920 for (i = 0, d = bdesc_pcmpistr;
28921 i < ARRAY_SIZE (bdesc_pcmpistr);
28923 if (d->code == fcode)
28924 return ix86_expand_sse_pcmpistr (d, exp, target);
28926 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
28927 if (d->code == fcode)
28928 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
28929 (enum ix86_builtin_func_type)
28930 d->flag, d->comparison);
28932 gcc_unreachable ();
28935 /* Returns a function decl for a vectorized version of the builtin function
28936 with builtin function code FN and the result vector type TYPE, or NULL_TREE
28937 if it is not available. */
28940 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
28943 enum machine_mode in_mode, out_mode;
28945 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
28947 if (TREE_CODE (type_out) != VECTOR_TYPE
28948 || TREE_CODE (type_in) != VECTOR_TYPE
28949 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
28952 out_mode = TYPE_MODE (TREE_TYPE (type_out));
28953 out_n = TYPE_VECTOR_SUBPARTS (type_out);
28954 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28955 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28959 case BUILT_IN_SQRT:
28960 if (out_mode == DFmode && in_mode == DFmode)
28962 if (out_n == 2 && in_n == 2)
28963 return ix86_builtins[IX86_BUILTIN_SQRTPD];
28964 else if (out_n == 4 && in_n == 4)
28965 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
28969 case BUILT_IN_SQRTF:
28970 if (out_mode == SFmode && in_mode == SFmode)
28972 if (out_n == 4 && in_n == 4)
28973 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
28974 else if (out_n == 8 && in_n == 8)
28975 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
28979 case BUILT_IN_LRINT:
28980 if (out_mode == SImode && out_n == 4
28981 && in_mode == DFmode && in_n == 2)
28982 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
28985 case BUILT_IN_LRINTF:
28986 if (out_mode == SImode && in_mode == SFmode)
28988 if (out_n == 4 && in_n == 4)
28989 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
28990 else if (out_n == 8 && in_n == 8)
28991 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
28995 case BUILT_IN_COPYSIGN:
28996 if (out_mode == DFmode && in_mode == DFmode)
28998 if (out_n == 2 && in_n == 2)
28999 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
29000 else if (out_n == 4 && in_n == 4)
29001 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
29005 case BUILT_IN_COPYSIGNF:
29006 if (out_mode == SFmode && in_mode == SFmode)
29008 if (out_n == 4 && in_n == 4)
29009 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
29010 else if (out_n == 8 && in_n == 8)
29011 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
29015 case BUILT_IN_FLOOR:
29016 /* The round insn does not trap on denormals. */
29017 if (flag_trapping_math || !TARGET_ROUND)
29020 if (out_mode == DFmode && in_mode == DFmode)
29022 if (out_n == 2 && in_n == 2)
29023 return ix86_builtins[IX86_BUILTIN_FLOORPD];
29024 else if (out_n == 4 && in_n == 4)
29025 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
29029 case BUILT_IN_FLOORF:
29030 /* The round insn does not trap on denormals. */
29031 if (flag_trapping_math || !TARGET_ROUND)
29034 if (out_mode == SFmode && in_mode == SFmode)
29036 if (out_n == 4 && in_n == 4)
29037 return ix86_builtins[IX86_BUILTIN_FLOORPS];
29038 else if (out_n == 8 && in_n == 8)
29039 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
29043 case BUILT_IN_CEIL:
29044 /* The round insn does not trap on denormals. */
29045 if (flag_trapping_math || !TARGET_ROUND)
29048 if (out_mode == DFmode && in_mode == DFmode)
29050 if (out_n == 2 && in_n == 2)
29051 return ix86_builtins[IX86_BUILTIN_CEILPD];
29052 else if (out_n == 4 && in_n == 4)
29053 return ix86_builtins[IX86_BUILTIN_CEILPD256];
29057 case BUILT_IN_CEILF:
29058 /* The round insn does not trap on denormals. */
29059 if (flag_trapping_math || !TARGET_ROUND)
29062 if (out_mode == SFmode && in_mode == SFmode)
29064 if (out_n == 4 && in_n == 4)
29065 return ix86_builtins[IX86_BUILTIN_CEILPS];
29066 else if (out_n == 8 && in_n == 8)
29067 return ix86_builtins[IX86_BUILTIN_CEILPS256];
29071 case BUILT_IN_TRUNC:
29072 /* The round insn does not trap on denormals. */
29073 if (flag_trapping_math || !TARGET_ROUND)
29076 if (out_mode == DFmode && in_mode == DFmode)
29078 if (out_n == 2 && in_n == 2)
29079 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
29080 else if (out_n == 4 && in_n == 4)
29081 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
29085 case BUILT_IN_TRUNCF:
29086 /* The round insn does not trap on denormals. */
29087 if (flag_trapping_math || !TARGET_ROUND)
29090 if (out_mode == SFmode && in_mode == SFmode)
29092 if (out_n == 4 && in_n == 4)
29093 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
29094 else if (out_n == 8 && in_n == 8)
29095 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
29099 case BUILT_IN_RINT:
29100 /* The round insn does not trap on denormals. */
29101 if (flag_trapping_math || !TARGET_ROUND)
29104 if (out_mode == DFmode && in_mode == DFmode)
29106 if (out_n == 2 && in_n == 2)
29107 return ix86_builtins[IX86_BUILTIN_RINTPD];
29108 else if (out_n == 4 && in_n == 4)
29109 return ix86_builtins[IX86_BUILTIN_RINTPD256];
29113 case BUILT_IN_RINTF:
29114 /* The round insn does not trap on denormals. */
29115 if (flag_trapping_math || !TARGET_ROUND)
29118 if (out_mode == SFmode && in_mode == SFmode)
29120 if (out_n == 4 && in_n == 4)
29121 return ix86_builtins[IX86_BUILTIN_RINTPS];
29122 else if (out_n == 8 && in_n == 8)
29123 return ix86_builtins[IX86_BUILTIN_RINTPS256];
29127 case BUILT_IN_ROUND:
29128 /* The round insn does not trap on denormals. */
29129 if (flag_trapping_math || !TARGET_ROUND)
29132 if (out_mode == DFmode && in_mode == DFmode)
29134 if (out_n == 2 && in_n == 2)
29135 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
29136 else if (out_n == 4 && in_n == 4)
29137 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
29141 case BUILT_IN_ROUNDF:
29142 /* The round insn does not trap on denormals. */
29143 if (flag_trapping_math || !TARGET_ROUND)
29146 if (out_mode == SFmode && in_mode == SFmode)
29148 if (out_n == 4 && in_n == 4)
29149 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
29150 else if (out_n == 8 && in_n == 8)
29151 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
29156 if (out_mode == DFmode && in_mode == DFmode)
29158 if (out_n == 2 && in_n == 2)
29159 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
29160 if (out_n == 4 && in_n == 4)
29161 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
29165 case BUILT_IN_FMAF:
29166 if (out_mode == SFmode && in_mode == SFmode)
29168 if (out_n == 4 && in_n == 4)
29169 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
29170 if (out_n == 8 && in_n == 8)
29171 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
29179 /* Dispatch to a handler for a vectorization library. */
29180 if (ix86_veclib_handler)
29181 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
29187 /* Handler for an SVML-style interface to
29188 a library with vectorized intrinsics. */
29191 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
29194 tree fntype, new_fndecl, args;
29197 enum machine_mode el_mode, in_mode;
29200 /* The SVML is suitable for unsafe math only. */
29201 if (!flag_unsafe_math_optimizations)
29204 el_mode = TYPE_MODE (TREE_TYPE (type_out));
29205 n = TYPE_VECTOR_SUBPARTS (type_out);
29206 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29207 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29208 if (el_mode != in_mode
29216 case BUILT_IN_LOG10:
29218 case BUILT_IN_TANH:
29220 case BUILT_IN_ATAN:
29221 case BUILT_IN_ATAN2:
29222 case BUILT_IN_ATANH:
29223 case BUILT_IN_CBRT:
29224 case BUILT_IN_SINH:
29226 case BUILT_IN_ASINH:
29227 case BUILT_IN_ASIN:
29228 case BUILT_IN_COSH:
29230 case BUILT_IN_ACOSH:
29231 case BUILT_IN_ACOS:
29232 if (el_mode != DFmode || n != 2)
29236 case BUILT_IN_EXPF:
29237 case BUILT_IN_LOGF:
29238 case BUILT_IN_LOG10F:
29239 case BUILT_IN_POWF:
29240 case BUILT_IN_TANHF:
29241 case BUILT_IN_TANF:
29242 case BUILT_IN_ATANF:
29243 case BUILT_IN_ATAN2F:
29244 case BUILT_IN_ATANHF:
29245 case BUILT_IN_CBRTF:
29246 case BUILT_IN_SINHF:
29247 case BUILT_IN_SINF:
29248 case BUILT_IN_ASINHF:
29249 case BUILT_IN_ASINF:
29250 case BUILT_IN_COSHF:
29251 case BUILT_IN_COSF:
29252 case BUILT_IN_ACOSHF:
29253 case BUILT_IN_ACOSF:
29254 if (el_mode != SFmode || n != 4)
29262 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
29264 if (fn == BUILT_IN_LOGF)
29265 strcpy (name, "vmlsLn4");
29266 else if (fn == BUILT_IN_LOG)
29267 strcpy (name, "vmldLn2");
29270 sprintf (name, "vmls%s", bname+10);
29271 name[strlen (name)-1] = '4';
29274 sprintf (name, "vmld%s2", bname+10);
29276 /* Convert to uppercase. */
29280 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
29282 args = TREE_CHAIN (args))
29286 fntype = build_function_type_list (type_out, type_in, NULL);
29288 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
29290 /* Build a function declaration for the vectorized function. */
29291 new_fndecl = build_decl (BUILTINS_LOCATION,
29292 FUNCTION_DECL, get_identifier (name), fntype);
29293 TREE_PUBLIC (new_fndecl) = 1;
29294 DECL_EXTERNAL (new_fndecl) = 1;
29295 DECL_IS_NOVOPS (new_fndecl) = 1;
29296 TREE_READONLY (new_fndecl) = 1;
29301 /* Handler for an ACML-style interface to
29302 a library with vectorized intrinsics. */
29305 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
29307 char name[20] = "__vr.._";
29308 tree fntype, new_fndecl, args;
29311 enum machine_mode el_mode, in_mode;
29314 /* The ACML is 64bits only and suitable for unsafe math only as
29315 it does not correctly support parts of IEEE with the required
29316 precision such as denormals. */
29318 || !flag_unsafe_math_optimizations)
29321 el_mode = TYPE_MODE (TREE_TYPE (type_out));
29322 n = TYPE_VECTOR_SUBPARTS (type_out);
29323 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29324 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29325 if (el_mode != in_mode
29335 case BUILT_IN_LOG2:
29336 case BUILT_IN_LOG10:
29339 if (el_mode != DFmode
29344 case BUILT_IN_SINF:
29345 case BUILT_IN_COSF:
29346 case BUILT_IN_EXPF:
29347 case BUILT_IN_POWF:
29348 case BUILT_IN_LOGF:
29349 case BUILT_IN_LOG2F:
29350 case BUILT_IN_LOG10F:
29353 if (el_mode != SFmode
29362 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
29363 sprintf (name + 7, "%s", bname+10);
29366 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
29368 args = TREE_CHAIN (args))
29372 fntype = build_function_type_list (type_out, type_in, NULL);
29374 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
29376 /* Build a function declaration for the vectorized function. */
29377 new_fndecl = build_decl (BUILTINS_LOCATION,
29378 FUNCTION_DECL, get_identifier (name), fntype);
29379 TREE_PUBLIC (new_fndecl) = 1;
29380 DECL_EXTERNAL (new_fndecl) = 1;
29381 DECL_IS_NOVOPS (new_fndecl) = 1;
29382 TREE_READONLY (new_fndecl) = 1;
29388 /* Returns a decl of a function that implements conversion of an integer vector
29389 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
29390 are the types involved when converting according to CODE.
29391 Return NULL_TREE if it is not available. */
29394 ix86_vectorize_builtin_conversion (unsigned int code,
29395 tree dest_type, tree src_type)
29403 switch (TYPE_MODE (src_type))
29406 switch (TYPE_MODE (dest_type))
29409 return (TYPE_UNSIGNED (src_type)
29410 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
29411 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
29413 return (TYPE_UNSIGNED (src_type)
29415 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
29421 switch (TYPE_MODE (dest_type))
29424 return (TYPE_UNSIGNED (src_type)
29426 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
29435 case FIX_TRUNC_EXPR:
29436 switch (TYPE_MODE (dest_type))
29439 switch (TYPE_MODE (src_type))
29442 return (TYPE_UNSIGNED (dest_type)
29444 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
29446 return (TYPE_UNSIGNED (dest_type)
29448 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
29455 switch (TYPE_MODE (src_type))
29458 return (TYPE_UNSIGNED (dest_type)
29460 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
29477 /* Returns a code for a target-specific builtin that implements
29478 reciprocal of the function, or NULL_TREE if not available. */
29481 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
29482 bool sqrt ATTRIBUTE_UNUSED)
29484 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
29485 && flag_finite_math_only && !flag_trapping_math
29486 && flag_unsafe_math_optimizations))
29490 /* Machine dependent builtins. */
29493 /* Vectorized version of sqrt to rsqrt conversion. */
29494 case IX86_BUILTIN_SQRTPS_NR:
29495 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
29497 case IX86_BUILTIN_SQRTPS_NR256:
29498 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
29504 /* Normal builtins. */
29507 /* Sqrt to rsqrt conversion. */
29508 case BUILT_IN_SQRTF:
29509 return ix86_builtins[IX86_BUILTIN_RSQRTF];
29516 /* Helper for avx_vpermilps256_operand et al. This is also used by
29517 the expansion functions to turn the parallel back into a mask.
29518 The return value is 0 for no match and the imm8+1 for a match. */
29521 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
29523 unsigned i, nelt = GET_MODE_NUNITS (mode);
29525 unsigned char ipar[8];
29527 if (XVECLEN (par, 0) != (int) nelt)
29530 /* Validate that all of the elements are constants, and not totally
29531 out of range. Copy the data into an integral array to make the
29532 subsequent checks easier. */
29533 for (i = 0; i < nelt; ++i)
29535 rtx er = XVECEXP (par, 0, i);
29536 unsigned HOST_WIDE_INT ei;
29538 if (!CONST_INT_P (er))
29549 /* In the 256-bit DFmode case, we can only move elements within
29551 for (i = 0; i < 2; ++i)
29555 mask |= ipar[i] << i;
29557 for (i = 2; i < 4; ++i)
29561 mask |= (ipar[i] - 2) << i;
29566 /* In the 256-bit SFmode case, we have full freedom of movement
29567 within the low 128-bit lane, but the high 128-bit lane must
29568 mirror the exact same pattern. */
29569 for (i = 0; i < 4; ++i)
29570 if (ipar[i] + 4 != ipar[i + 4])
29577 /* In the 128-bit case, we've full freedom in the placement of
29578 the elements from the source operand. */
29579 for (i = 0; i < nelt; ++i)
29580 mask |= ipar[i] << (i * (nelt / 2));
29584 gcc_unreachable ();
29587 /* Make sure success has a non-zero value by adding one. */
29591 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
29592 the expansion functions to turn the parallel back into a mask.
29593 The return value is 0 for no match and the imm8+1 for a match. */
29596 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
29598 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
29600 unsigned char ipar[8];
29602 if (XVECLEN (par, 0) != (int) nelt)
29605 /* Validate that all of the elements are constants, and not totally
29606 out of range. Copy the data into an integral array to make the
29607 subsequent checks easier. */
29608 for (i = 0; i < nelt; ++i)
29610 rtx er = XVECEXP (par, 0, i);
29611 unsigned HOST_WIDE_INT ei;
29613 if (!CONST_INT_P (er))
29616 if (ei >= 2 * nelt)
29621 /* Validate that the halves of the permute are halves. */
29622 for (i = 0; i < nelt2 - 1; ++i)
29623 if (ipar[i] + 1 != ipar[i + 1])
29625 for (i = nelt2; i < nelt - 1; ++i)
29626 if (ipar[i] + 1 != ipar[i + 1])
29629 /* Reconstruct the mask. */
29630 for (i = 0; i < 2; ++i)
29632 unsigned e = ipar[i * nelt2];
29636 mask |= e << (i * 4);
29639 /* Make sure success has a non-zero value by adding one. */
29644 /* Store OPERAND to the memory after reload is completed. This means
29645 that we can't easily use assign_stack_local. */
29647 ix86_force_to_memory (enum machine_mode mode, rtx operand)
29651 gcc_assert (reload_completed);
29652 if (ix86_using_red_zone ())
29654 result = gen_rtx_MEM (mode,
29655 gen_rtx_PLUS (Pmode,
29657 GEN_INT (-RED_ZONE_SIZE)));
29658 emit_move_insn (result, operand);
29660 else if (TARGET_64BIT)
29666 operand = gen_lowpart (DImode, operand);
29670 gen_rtx_SET (VOIDmode,
29671 gen_rtx_MEM (DImode,
29672 gen_rtx_PRE_DEC (DImode,
29673 stack_pointer_rtx)),
29677 gcc_unreachable ();
29679 result = gen_rtx_MEM (mode, stack_pointer_rtx);
29688 split_double_mode (mode, &operand, 1, operands, operands + 1);
29690 gen_rtx_SET (VOIDmode,
29691 gen_rtx_MEM (SImode,
29692 gen_rtx_PRE_DEC (Pmode,
29693 stack_pointer_rtx)),
29696 gen_rtx_SET (VOIDmode,
29697 gen_rtx_MEM (SImode,
29698 gen_rtx_PRE_DEC (Pmode,
29699 stack_pointer_rtx)),
29704 /* Store HImodes as SImodes. */
29705 operand = gen_lowpart (SImode, operand);
29709 gen_rtx_SET (VOIDmode,
29710 gen_rtx_MEM (GET_MODE (operand),
29711 gen_rtx_PRE_DEC (SImode,
29712 stack_pointer_rtx)),
29716 gcc_unreachable ();
29718 result = gen_rtx_MEM (mode, stack_pointer_rtx);
29723 /* Free operand from the memory. */
29725 ix86_free_from_memory (enum machine_mode mode)
29727 if (!ix86_using_red_zone ())
29731 if (mode == DImode || TARGET_64BIT)
29735 /* Use LEA to deallocate stack space. In peephole2 it will be converted
29736 to pop or add instruction if registers are available. */
29737 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
29738 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
29743 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
29745 Put float CONST_DOUBLE in the constant pool instead of fp regs.
29746 QImode must go into class Q_REGS.
29747 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
29748 movdf to do mem-to-mem moves through integer regs. */
29751 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
29753 enum machine_mode mode = GET_MODE (x);
29755 /* We're only allowed to return a subclass of CLASS. Many of the
29756 following checks fail for NO_REGS, so eliminate that early. */
29757 if (regclass == NO_REGS)
29760 /* All classes can load zeros. */
29761 if (x == CONST0_RTX (mode))
29764 /* Force constants into memory if we are loading a (nonzero) constant into
29765 an MMX or SSE register. This is because there are no MMX/SSE instructions
29766 to load from a constant. */
29768 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
29771 /* Prefer SSE regs only, if we can use them for math. */
29772 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
29773 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
29775 /* Floating-point constants need more complex checks. */
29776 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
29778 /* General regs can load everything. */
29779 if (reg_class_subset_p (regclass, GENERAL_REGS))
29782 /* Floats can load 0 and 1 plus some others. Note that we eliminated
29783 zero above. We only want to wind up preferring 80387 registers if
29784 we plan on doing computation with them. */
29786 && standard_80387_constant_p (x) > 0)
29788 /* Limit class to non-sse. */
29789 if (regclass == FLOAT_SSE_REGS)
29791 if (regclass == FP_TOP_SSE_REGS)
29793 if (regclass == FP_SECOND_SSE_REGS)
29794 return FP_SECOND_REG;
29795 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
29802 /* Generally when we see PLUS here, it's the function invariant
29803 (plus soft-fp const_int). Which can only be computed into general
29805 if (GET_CODE (x) == PLUS)
29806 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
29808 /* QImode constants are easy to load, but non-constant QImode data
29809 must go into Q_REGS. */
29810 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
29812 if (reg_class_subset_p (regclass, Q_REGS))
29814 if (reg_class_subset_p (Q_REGS, regclass))
29822 /* Discourage putting floating-point values in SSE registers unless
29823 SSE math is being used, and likewise for the 387 registers. */
29825 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
29827 enum machine_mode mode = GET_MODE (x);
29829 /* Restrict the output reload class to the register bank that we are doing
29830 math on. If we would like not to return a subset of CLASS, reject this
29831 alternative: if reload cannot do this, it will still use its choice. */
29832 mode = GET_MODE (x);
29833 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
29834 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
29836 if (X87_FLOAT_MODE_P (mode))
29838 if (regclass == FP_TOP_SSE_REGS)
29840 else if (regclass == FP_SECOND_SSE_REGS)
29841 return FP_SECOND_REG;
29843 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
29850 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
29851 enum machine_mode mode, secondary_reload_info *sri)
29853 /* Double-word spills from general registers to non-offsettable memory
29854 references (zero-extended addresses) require special handling. */
29857 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
29858 && rclass == GENERAL_REGS
29859 && !offsettable_memref_p (x))
29862 ? CODE_FOR_reload_noff_load
29863 : CODE_FOR_reload_noff_store);
29864 /* Add the cost of moving address to a temporary. */
29865 sri->extra_cost = 1;
29870 /* QImode spills from non-QI registers require
29871 intermediate register on 32bit targets. */
29873 && !in_p && mode == QImode
29874 && (rclass == GENERAL_REGS
29875 || rclass == LEGACY_REGS
29876 || rclass == INDEX_REGS))
29885 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
29886 regno = true_regnum (x);
29888 /* Return Q_REGS if the operand is in memory. */
29893 /* This condition handles corner case where an expression involving
29894 pointers gets vectorized. We're trying to use the address of a
29895 stack slot as a vector initializer.
29897 (set (reg:V2DI 74 [ vect_cst_.2 ])
29898 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
29900 Eventually frame gets turned into sp+offset like this:
29902 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29903 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
29904 (const_int 392 [0x188]))))
29906 That later gets turned into:
29908 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29909 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
29910 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
29912 We'll have the following reload recorded:
29914 Reload 0: reload_in (DI) =
29915 (plus:DI (reg/f:DI 7 sp)
29916 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
29917 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29918 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
29919 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
29920 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29921 reload_reg_rtx: (reg:V2DI 22 xmm1)
29923 Which isn't going to work since SSE instructions can't handle scalar
29924 additions. Returning GENERAL_REGS forces the addition into integer
29925 register and reload can handle subsequent reloads without problems. */
29927 if (in_p && GET_CODE (x) == PLUS
29928 && SSE_CLASS_P (rclass)
29929 && SCALAR_INT_MODE_P (mode))
29930 return GENERAL_REGS;
29935 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
29938 ix86_class_likely_spilled_p (reg_class_t rclass)
29949 case SSE_FIRST_REG:
29951 case FP_SECOND_REG:
29961 /* If we are copying between general and FP registers, we need a memory
29962 location. The same is true for SSE and MMX registers.
29964 To optimize register_move_cost performance, allow inline variant.
29966 The macro can't work reliably when one of the CLASSES is class containing
29967 registers from multiple units (SSE, MMX, integer). We avoid this by never
29968 combining those units in single alternative in the machine description.
29969 Ensure that this constraint holds to avoid unexpected surprises.
29971 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
29972 enforce these sanity checks. */
29975 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
29976 enum machine_mode mode, int strict)
29978 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
29979 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
29980 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
29981 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
29982 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
29983 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
29985 gcc_assert (!strict);
29989 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
29992 /* ??? This is a lie. We do have moves between mmx/general, and for
29993 mmx/sse2. But by saying we need secondary memory we discourage the
29994 register allocator from using the mmx registers unless needed. */
29995 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
29998 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
30000 /* SSE1 doesn't have any direct moves from other classes. */
30004 /* If the target says that inter-unit moves are more expensive
30005 than moving through memory, then don't generate them. */
30006 if (!TARGET_INTER_UNIT_MOVES)
30009 /* Between SSE and general, we have moves no larger than word size. */
30010 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
30018 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
30019 enum machine_mode mode, int strict)
30021 return inline_secondary_memory_needed (class1, class2, mode, strict);
30024 /* Implement the TARGET_CLASS_MAX_NREGS hook.
30026 On the 80386, this is the size of MODE in words,
30027 except in the FP regs, where a single reg is always enough. */
30029 static unsigned char
30030 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
30032 if (MAYBE_INTEGER_CLASS_P (rclass))
30034 if (mode == XFmode)
30035 return (TARGET_64BIT ? 2 : 3);
30036 else if (mode == XCmode)
30037 return (TARGET_64BIT ? 4 : 6);
30039 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
30043 if (COMPLEX_MODE_P (mode))
30050 /* Return true if the registers in CLASS cannot represent the change from
30051 modes FROM to TO. */
30054 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
30055 enum reg_class regclass)
30060 /* x87 registers can't do subreg at all, as all values are reformatted
30061 to extended precision. */
30062 if (MAYBE_FLOAT_CLASS_P (regclass))
30065 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
30067 /* Vector registers do not support QI or HImode loads. If we don't
30068 disallow a change to these modes, reload will assume it's ok to
30069 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
30070 the vec_dupv4hi pattern. */
30071 if (GET_MODE_SIZE (from) < 4)
30074 /* Vector registers do not support subreg with nonzero offsets, which
30075 are otherwise valid for integer registers. Since we can't see
30076 whether we have a nonzero offset from here, prohibit all
30077 nonparadoxical subregs changing size. */
30078 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
30085 /* Return the cost of moving data of mode M between a
30086 register and memory. A value of 2 is the default; this cost is
30087 relative to those in `REGISTER_MOVE_COST'.
30089 This function is used extensively by register_move_cost that is used to
30090 build tables at startup. Make it inline in this case.
30091 When IN is 2, return maximum of in and out move cost.
30093 If moving between registers and memory is more expensive than
30094 between two registers, you should define this macro to express the
30097 Model also increased moving costs of QImode registers in non
30101 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
30105 if (FLOAT_CLASS_P (regclass))
30123 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
30124 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
30126 if (SSE_CLASS_P (regclass))
30129 switch (GET_MODE_SIZE (mode))
30144 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
30145 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
30147 if (MMX_CLASS_P (regclass))
30150 switch (GET_MODE_SIZE (mode))
30162 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
30163 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
30165 switch (GET_MODE_SIZE (mode))
30168 if (Q_CLASS_P (regclass) || TARGET_64BIT)
30171 return ix86_cost->int_store[0];
30172 if (TARGET_PARTIAL_REG_DEPENDENCY
30173 && optimize_function_for_speed_p (cfun))
30174 cost = ix86_cost->movzbl_load;
30176 cost = ix86_cost->int_load[0];
30178 return MAX (cost, ix86_cost->int_store[0]);
30184 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
30186 return ix86_cost->movzbl_load;
30188 return ix86_cost->int_store[0] + 4;
30193 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
30194 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
30196 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
30197 if (mode == TFmode)
30200 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
30202 cost = ix86_cost->int_load[2];
30204 cost = ix86_cost->int_store[2];
30205 return (cost * (((int) GET_MODE_SIZE (mode)
30206 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
30211 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
30214 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
30218 /* Return the cost of moving data from a register in class CLASS1 to
30219 one in class CLASS2.
30221 It is not required that the cost always equal 2 when FROM is the same as TO;
30222 on some machines it is expensive to move between registers if they are not
30223 general registers. */
30226 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
30227 reg_class_t class2_i)
30229 enum reg_class class1 = (enum reg_class) class1_i;
30230 enum reg_class class2 = (enum reg_class) class2_i;
30232 /* In case we require secondary memory, compute cost of the store followed
30233 by load. In order to avoid bad register allocation choices, we need
30234 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
30236 if (inline_secondary_memory_needed (class1, class2, mode, 0))
30240 cost += inline_memory_move_cost (mode, class1, 2);
30241 cost += inline_memory_move_cost (mode, class2, 2);
30243 /* In case of copying from general_purpose_register we may emit multiple
30244 stores followed by single load causing memory size mismatch stall.
30245 Count this as arbitrarily high cost of 20. */
30246 if (targetm.class_max_nregs (class1, mode)
30247 > targetm.class_max_nregs (class2, mode))
30250 /* In the case of FP/MMX moves, the registers actually overlap, and we
30251 have to switch modes in order to treat them differently. */
30252 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
30253 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
30259 /* Moves between SSE/MMX and integer unit are expensive. */
30260 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
30261 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
30263 /* ??? By keeping returned value relatively high, we limit the number
30264 of moves between integer and MMX/SSE registers for all targets.
30265 Additionally, high value prevents problem with x86_modes_tieable_p(),
30266 where integer modes in MMX/SSE registers are not tieable
30267 because of missing QImode and HImode moves to, from or between
30268 MMX/SSE registers. */
30269 return MAX (8, ix86_cost->mmxsse_to_integer);
30271 if (MAYBE_FLOAT_CLASS_P (class1))
30272 return ix86_cost->fp_move;
30273 if (MAYBE_SSE_CLASS_P (class1))
30274 return ix86_cost->sse_move;
30275 if (MAYBE_MMX_CLASS_P (class1))
30276 return ix86_cost->mmx_move;
30280 /* Return TRUE if hard register REGNO can hold a value of machine-mode
30284 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
30286 /* Flags and only flags can only hold CCmode values. */
30287 if (CC_REGNO_P (regno))
30288 return GET_MODE_CLASS (mode) == MODE_CC;
30289 if (GET_MODE_CLASS (mode) == MODE_CC
30290 || GET_MODE_CLASS (mode) == MODE_RANDOM
30291 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
30293 if (FP_REGNO_P (regno))
30294 return VALID_FP_MODE_P (mode);
30295 if (SSE_REGNO_P (regno))
30297 /* We implement the move patterns for all vector modes into and
30298 out of SSE registers, even when no operation instructions
30299 are available. OImode move is available only when AVX is
30301 return ((TARGET_AVX && mode == OImode)
30302 || VALID_AVX256_REG_MODE (mode)
30303 || VALID_SSE_REG_MODE (mode)
30304 || VALID_SSE2_REG_MODE (mode)
30305 || VALID_MMX_REG_MODE (mode)
30306 || VALID_MMX_REG_MODE_3DNOW (mode));
30308 if (MMX_REGNO_P (regno))
30310 /* We implement the move patterns for 3DNOW modes even in MMX mode,
30311 so if the register is available at all, then we can move data of
30312 the given mode into or out of it. */
30313 return (VALID_MMX_REG_MODE (mode)
30314 || VALID_MMX_REG_MODE_3DNOW (mode));
30317 if (mode == QImode)
30319 /* Take care for QImode values - they can be in non-QI regs,
30320 but then they do cause partial register stalls. */
30321 if (regno <= BX_REG || TARGET_64BIT)
30323 if (!TARGET_PARTIAL_REG_STALL)
30325 return !can_create_pseudo_p ();
30327 /* We handle both integer and floats in the general purpose registers. */
30328 else if (VALID_INT_MODE_P (mode))
30330 else if (VALID_FP_MODE_P (mode))
30332 else if (VALID_DFP_MODE_P (mode))
30334 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
30335 on to use that value in smaller contexts, this can easily force a
30336 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
30337 supporting DImode, allow it. */
30338 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
30344 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
30345 tieable integer mode. */
30348 ix86_tieable_integer_mode_p (enum machine_mode mode)
30357 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
30360 return TARGET_64BIT;
30367 /* Return true if MODE1 is accessible in a register that can hold MODE2
30368 without copying. That is, all register classes that can hold MODE2
30369 can also hold MODE1. */
30372 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
30374 if (mode1 == mode2)
30377 if (ix86_tieable_integer_mode_p (mode1)
30378 && ix86_tieable_integer_mode_p (mode2))
30381 /* MODE2 being XFmode implies fp stack or general regs, which means we
30382 can tie any smaller floating point modes to it. Note that we do not
30383 tie this with TFmode. */
30384 if (mode2 == XFmode)
30385 return mode1 == SFmode || mode1 == DFmode;
30387 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
30388 that we can tie it with SFmode. */
30389 if (mode2 == DFmode)
30390 return mode1 == SFmode;
30392 /* If MODE2 is only appropriate for an SSE register, then tie with
30393 any other mode acceptable to SSE registers. */
30394 if (GET_MODE_SIZE (mode2) == 16
30395 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
30396 return (GET_MODE_SIZE (mode1) == 16
30397 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
30399 /* If MODE2 is appropriate for an MMX register, then tie
30400 with any other mode acceptable to MMX registers. */
30401 if (GET_MODE_SIZE (mode2) == 8
30402 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
30403 return (GET_MODE_SIZE (mode1) == 8
30404 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
30409 /* Compute a (partial) cost for rtx X. Return true if the complete
30410 cost has been computed, and false if subexpressions should be
30411 scanned. In either case, *TOTAL contains the cost result. */
30414 ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
30417 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
30418 enum machine_mode mode = GET_MODE (x);
30419 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
30427 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
30429 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
30431 else if (flag_pic && SYMBOLIC_CONST (x)
30433 || (!GET_CODE (x) != LABEL_REF
30434 && (GET_CODE (x) != SYMBOL_REF
30435 || !SYMBOL_REF_LOCAL_P (x)))))
30442 if (mode == VOIDmode)
30445 switch (standard_80387_constant_p (x))
30450 default: /* Other constants */
30455 /* Start with (MEM (SYMBOL_REF)), since that's where
30456 it'll probably end up. Add a penalty for size. */
30457 *total = (COSTS_N_INSNS (1)
30458 + (flag_pic != 0 && !TARGET_64BIT)
30459 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
30465 /* The zero extensions is often completely free on x86_64, so make
30466 it as cheap as possible. */
30467 if (TARGET_64BIT && mode == DImode
30468 && GET_MODE (XEXP (x, 0)) == SImode)
30470 else if (TARGET_ZERO_EXTEND_WITH_AND)
30471 *total = cost->add;
30473 *total = cost->movzx;
30477 *total = cost->movsx;
30481 if (CONST_INT_P (XEXP (x, 1))
30482 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
30484 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
30487 *total = cost->add;
30490 if ((value == 2 || value == 3)
30491 && cost->lea <= cost->shift_const)
30493 *total = cost->lea;
30503 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
30505 if (CONST_INT_P (XEXP (x, 1)))
30507 if (INTVAL (XEXP (x, 1)) > 32)
30508 *total = cost->shift_const + COSTS_N_INSNS (2);
30510 *total = cost->shift_const * 2;
30514 if (GET_CODE (XEXP (x, 1)) == AND)
30515 *total = cost->shift_var * 2;
30517 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
30522 if (CONST_INT_P (XEXP (x, 1)))
30523 *total = cost->shift_const;
30525 *total = cost->shift_var;
30533 gcc_assert (FLOAT_MODE_P (mode));
30534 gcc_assert (TARGET_FMA || TARGET_FMA4);
30536 /* ??? SSE scalar/vector cost should be used here. */
30537 /* ??? Bald assumption that fma has the same cost as fmul. */
30538 *total = cost->fmul;
30539 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
30541 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
30543 if (GET_CODE (sub) == NEG)
30544 sub = XEXP (sub, 0);
30545 *total += rtx_cost (sub, FMA, 0, speed);
30548 if (GET_CODE (sub) == NEG)
30549 sub = XEXP (sub, 0);
30550 *total += rtx_cost (sub, FMA, 2, speed);
30555 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30557 /* ??? SSE scalar cost should be used here. */
30558 *total = cost->fmul;
30561 else if (X87_FLOAT_MODE_P (mode))
30563 *total = cost->fmul;
30566 else if (FLOAT_MODE_P (mode))
30568 /* ??? SSE vector cost should be used here. */
30569 *total = cost->fmul;
30574 rtx op0 = XEXP (x, 0);
30575 rtx op1 = XEXP (x, 1);
30577 if (CONST_INT_P (XEXP (x, 1)))
30579 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
30580 for (nbits = 0; value != 0; value &= value - 1)
30584 /* This is arbitrary. */
30587 /* Compute costs correctly for widening multiplication. */
30588 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
30589 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
30590 == GET_MODE_SIZE (mode))
30592 int is_mulwiden = 0;
30593 enum machine_mode inner_mode = GET_MODE (op0);
30595 if (GET_CODE (op0) == GET_CODE (op1))
30596 is_mulwiden = 1, op1 = XEXP (op1, 0);
30597 else if (CONST_INT_P (op1))
30599 if (GET_CODE (op0) == SIGN_EXTEND)
30600 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
30603 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
30607 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
30610 *total = (cost->mult_init[MODE_INDEX (mode)]
30611 + nbits * cost->mult_bit
30612 + rtx_cost (op0, outer_code, opno, speed)
30613 + rtx_cost (op1, outer_code, opno, speed));
30622 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30623 /* ??? SSE cost should be used here. */
30624 *total = cost->fdiv;
30625 else if (X87_FLOAT_MODE_P (mode))
30626 *total = cost->fdiv;
30627 else if (FLOAT_MODE_P (mode))
30628 /* ??? SSE vector cost should be used here. */
30629 *total = cost->fdiv;
30631 *total = cost->divide[MODE_INDEX (mode)];
30635 if (GET_MODE_CLASS (mode) == MODE_INT
30636 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
30638 if (GET_CODE (XEXP (x, 0)) == PLUS
30639 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
30640 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
30641 && CONSTANT_P (XEXP (x, 1)))
30643 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
30644 if (val == 2 || val == 4 || val == 8)
30646 *total = cost->lea;
30647 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
30648 outer_code, opno, speed);
30649 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
30650 outer_code, opno, speed);
30651 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
30655 else if (GET_CODE (XEXP (x, 0)) == MULT
30656 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
30658 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
30659 if (val == 2 || val == 4 || val == 8)
30661 *total = cost->lea;
30662 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
30663 outer_code, opno, speed);
30664 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
30668 else if (GET_CODE (XEXP (x, 0)) == PLUS)
30670 *total = cost->lea;
30671 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
30672 outer_code, opno, speed);
30673 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
30674 outer_code, opno, speed);
30675 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
30682 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30684 /* ??? SSE cost should be used here. */
30685 *total = cost->fadd;
30688 else if (X87_FLOAT_MODE_P (mode))
30690 *total = cost->fadd;
30693 else if (FLOAT_MODE_P (mode))
30695 /* ??? SSE vector cost should be used here. */
30696 *total = cost->fadd;
30704 if (!TARGET_64BIT && mode == DImode)
30706 *total = (cost->add * 2
30707 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
30708 << (GET_MODE (XEXP (x, 0)) != DImode))
30709 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
30710 << (GET_MODE (XEXP (x, 1)) != DImode)));
30716 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30718 /* ??? SSE cost should be used here. */
30719 *total = cost->fchs;
30722 else if (X87_FLOAT_MODE_P (mode))
30724 *total = cost->fchs;
30727 else if (FLOAT_MODE_P (mode))
30729 /* ??? SSE vector cost should be used here. */
30730 *total = cost->fchs;
30736 if (!TARGET_64BIT && mode == DImode)
30737 *total = cost->add * 2;
30739 *total = cost->add;
30743 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
30744 && XEXP (XEXP (x, 0), 1) == const1_rtx
30745 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
30746 && XEXP (x, 1) == const0_rtx)
30748 /* This kind of construct is implemented using test[bwl].
30749 Treat it as if we had an AND. */
30750 *total = (cost->add
30751 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
30752 + rtx_cost (const1_rtx, outer_code, opno, speed));
30758 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
30763 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30764 /* ??? SSE cost should be used here. */
30765 *total = cost->fabs;
30766 else if (X87_FLOAT_MODE_P (mode))
30767 *total = cost->fabs;
30768 else if (FLOAT_MODE_P (mode))
30769 /* ??? SSE vector cost should be used here. */
30770 *total = cost->fabs;
30774 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30775 /* ??? SSE cost should be used here. */
30776 *total = cost->fsqrt;
30777 else if (X87_FLOAT_MODE_P (mode))
30778 *total = cost->fsqrt;
30779 else if (FLOAT_MODE_P (mode))
30780 /* ??? SSE vector cost should be used here. */
30781 *total = cost->fsqrt;
30785 if (XINT (x, 1) == UNSPEC_TP)
30792 case VEC_DUPLICATE:
30793 /* ??? Assume all of these vector manipulation patterns are
30794 recognizable. In which case they all pretty much have the
30796 *total = COSTS_N_INSNS (1);
30806 static int current_machopic_label_num;
30808 /* Given a symbol name and its associated stub, write out the
30809 definition of the stub. */
30812 machopic_output_stub (FILE *file, const char *symb, const char *stub)
30814 unsigned int length;
30815 char *binder_name, *symbol_name, lazy_ptr_name[32];
30816 int label = ++current_machopic_label_num;
30818 /* For 64-bit we shouldn't get here. */
30819 gcc_assert (!TARGET_64BIT);
30821 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
30822 symb = targetm.strip_name_encoding (symb);
30824 length = strlen (stub);
30825 binder_name = XALLOCAVEC (char, length + 32);
30826 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
30828 length = strlen (symb);
30829 symbol_name = XALLOCAVEC (char, length + 32);
30830 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
30832 sprintf (lazy_ptr_name, "L%d$lz", label);
30834 if (MACHOPIC_ATT_STUB)
30835 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
30836 else if (MACHOPIC_PURE)
30837 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
30839 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
30841 fprintf (file, "%s:\n", stub);
30842 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
30844 if (MACHOPIC_ATT_STUB)
30846 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
30848 else if (MACHOPIC_PURE)
30851 /* 25-byte PIC stub using "CALL get_pc_thunk". */
30852 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
30853 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
30854 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
30855 label, lazy_ptr_name, label);
30856 fprintf (file, "\tjmp\t*%%ecx\n");
30859 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
30861 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
30862 it needs no stub-binding-helper. */
30863 if (MACHOPIC_ATT_STUB)
30866 fprintf (file, "%s:\n", binder_name);
30870 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
30871 fprintf (file, "\tpushl\t%%ecx\n");
30874 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
30876 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
30878 /* N.B. Keep the correspondence of these
30879 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
30880 old-pic/new-pic/non-pic stubs; altering this will break
30881 compatibility with existing dylibs. */
30884 /* 25-byte PIC stub using "CALL get_pc_thunk". */
30885 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
30888 /* 16-byte -mdynamic-no-pic stub. */
30889 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
30891 fprintf (file, "%s:\n", lazy_ptr_name);
30892 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
30893 fprintf (file, ASM_LONG "%s\n", binder_name);
30895 #endif /* TARGET_MACHO */
30897 /* Order the registers for register allocator. */
30900 x86_order_regs_for_local_alloc (void)
30905 /* First allocate the local general purpose registers. */
30906 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
30907 if (GENERAL_REGNO_P (i) && call_used_regs[i])
30908 reg_alloc_order [pos++] = i;
30910 /* Global general purpose registers. */
30911 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
30912 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
30913 reg_alloc_order [pos++] = i;
30915 /* x87 registers come first in case we are doing FP math
30917 if (!TARGET_SSE_MATH)
30918 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
30919 reg_alloc_order [pos++] = i;
30921 /* SSE registers. */
30922 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
30923 reg_alloc_order [pos++] = i;
30924 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
30925 reg_alloc_order [pos++] = i;
30927 /* x87 registers. */
30928 if (TARGET_SSE_MATH)
30929 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
30930 reg_alloc_order [pos++] = i;
30932 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
30933 reg_alloc_order [pos++] = i;
30935 /* Initialize the rest of array as we do not allocate some registers
30937 while (pos < FIRST_PSEUDO_REGISTER)
30938 reg_alloc_order [pos++] = 0;
30941 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
30942 in struct attribute_spec handler. */
30944 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
30946 int flags ATTRIBUTE_UNUSED,
30947 bool *no_add_attrs)
30949 if (TREE_CODE (*node) != FUNCTION_TYPE
30950 && TREE_CODE (*node) != METHOD_TYPE
30951 && TREE_CODE (*node) != FIELD_DECL
30952 && TREE_CODE (*node) != TYPE_DECL)
30954 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30956 *no_add_attrs = true;
30961 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
30963 *no_add_attrs = true;
30966 if (is_attribute_p ("callee_pop_aggregate_return", name))
30970 cst = TREE_VALUE (args);
30971 if (TREE_CODE (cst) != INTEGER_CST)
30973 warning (OPT_Wattributes,
30974 "%qE attribute requires an integer constant argument",
30976 *no_add_attrs = true;
30978 else if (compare_tree_int (cst, 0) != 0
30979 && compare_tree_int (cst, 1) != 0)
30981 warning (OPT_Wattributes,
30982 "argument to %qE attribute is neither zero, nor one",
30984 *no_add_attrs = true;
30993 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
30994 struct attribute_spec.handler. */
30996 ix86_handle_abi_attribute (tree *node, tree name,
30997 tree args ATTRIBUTE_UNUSED,
30998 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
31000 if (TREE_CODE (*node) != FUNCTION_TYPE
31001 && TREE_CODE (*node) != METHOD_TYPE
31002 && TREE_CODE (*node) != FIELD_DECL
31003 && TREE_CODE (*node) != TYPE_DECL)
31005 warning (OPT_Wattributes, "%qE attribute only applies to functions",
31007 *no_add_attrs = true;
31011 /* Can combine regparm with all attributes but fastcall. */
31012 if (is_attribute_p ("ms_abi", name))
31014 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
31016 error ("ms_abi and sysv_abi attributes are not compatible");
31021 else if (is_attribute_p ("sysv_abi", name))
31023 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
31025 error ("ms_abi and sysv_abi attributes are not compatible");
31034 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
31035 struct attribute_spec.handler. */
31037 ix86_handle_struct_attribute (tree *node, tree name,
31038 tree args ATTRIBUTE_UNUSED,
31039 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
31042 if (DECL_P (*node))
31044 if (TREE_CODE (*node) == TYPE_DECL)
31045 type = &TREE_TYPE (*node);
31050 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
31051 || TREE_CODE (*type) == UNION_TYPE)))
31053 warning (OPT_Wattributes, "%qE attribute ignored",
31055 *no_add_attrs = true;
31058 else if ((is_attribute_p ("ms_struct", name)
31059 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
31060 || ((is_attribute_p ("gcc_struct", name)
31061 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
31063 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
31065 *no_add_attrs = true;
31072 ix86_handle_fndecl_attribute (tree *node, tree name,
31073 tree args ATTRIBUTE_UNUSED,
31074 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
31076 if (TREE_CODE (*node) != FUNCTION_DECL)
31078 warning (OPT_Wattributes, "%qE attribute only applies to functions",
31080 *no_add_attrs = true;
31086 ix86_ms_bitfield_layout_p (const_tree record_type)
31088 return ((TARGET_MS_BITFIELD_LAYOUT
31089 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
31090 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
31093 /* Returns an expression indicating where the this parameter is
31094 located on entry to the FUNCTION. */
31097 x86_this_parameter (tree function)
31099 tree type = TREE_TYPE (function);
31100 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
31105 const int *parm_regs;
31107 if (ix86_function_type_abi (type) == MS_ABI)
31108 parm_regs = x86_64_ms_abi_int_parameter_registers;
31110 parm_regs = x86_64_int_parameter_registers;
31111 return gen_rtx_REG (DImode, parm_regs[aggr]);
31114 nregs = ix86_function_regparm (type, function);
31116 if (nregs > 0 && !stdarg_p (type))
31119 unsigned int ccvt = ix86_get_callcvt (type);
31121 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
31122 regno = aggr ? DX_REG : CX_REG;
31123 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
31127 return gen_rtx_MEM (SImode,
31128 plus_constant (stack_pointer_rtx, 4));
31137 return gen_rtx_MEM (SImode,
31138 plus_constant (stack_pointer_rtx, 4));
31141 return gen_rtx_REG (SImode, regno);
31144 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
31147 /* Determine whether x86_output_mi_thunk can succeed. */
31150 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
31151 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
31152 HOST_WIDE_INT vcall_offset, const_tree function)
31154 /* 64-bit can handle anything. */
31158 /* For 32-bit, everything's fine if we have one free register. */
31159 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
31162 /* Need a free register for vcall_offset. */
31166 /* Need a free register for GOT references. */
31167 if (flag_pic && !targetm.binds_local_p (function))
31170 /* Otherwise ok. */
31174 /* Output the assembler code for a thunk function. THUNK_DECL is the
31175 declaration for the thunk function itself, FUNCTION is the decl for
31176 the target function. DELTA is an immediate constant offset to be
31177 added to THIS. If VCALL_OFFSET is nonzero, the word at
31178 *(*this + vcall_offset) should be added to THIS. */
31181 x86_output_mi_thunk (FILE *file,
31182 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
31183 HOST_WIDE_INT vcall_offset, tree function)
31185 rtx this_param = x86_this_parameter (function);
31186 rtx this_reg, tmp, fnaddr;
31188 emit_note (NOTE_INSN_PROLOGUE_END);
31190 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
31191 pull it in now and let DELTA benefit. */
31192 if (REG_P (this_param))
31193 this_reg = this_param;
31194 else if (vcall_offset)
31196 /* Put the this parameter into %eax. */
31197 this_reg = gen_rtx_REG (Pmode, AX_REG);
31198 emit_move_insn (this_reg, this_param);
31201 this_reg = NULL_RTX;
31203 /* Adjust the this parameter by a fixed constant. */
31206 rtx delta_rtx = GEN_INT (delta);
31207 rtx delta_dst = this_reg ? this_reg : this_param;
31211 if (!x86_64_general_operand (delta_rtx, Pmode))
31213 tmp = gen_rtx_REG (Pmode, R10_REG);
31214 emit_move_insn (tmp, delta_rtx);
31219 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
31222 /* Adjust the this parameter by a value stored in the vtable. */
31225 rtx vcall_addr, vcall_mem, this_mem;
31226 unsigned int tmp_regno;
31229 tmp_regno = R10_REG;
31232 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
31233 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
31234 tmp_regno = AX_REG;
31236 tmp_regno = CX_REG;
31238 tmp = gen_rtx_REG (Pmode, tmp_regno);
31240 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
31241 if (Pmode != ptr_mode)
31242 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
31243 emit_move_insn (tmp, this_mem);
31245 /* Adjust the this parameter. */
31246 vcall_addr = plus_constant (tmp, vcall_offset);
31248 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
31250 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
31251 emit_move_insn (tmp2, GEN_INT (vcall_offset));
31252 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
31255 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
31256 if (Pmode != ptr_mode)
31257 emit_insn (gen_addsi_1_zext (this_reg,
31258 gen_rtx_REG (ptr_mode,
31262 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
31265 /* If necessary, drop THIS back to its stack slot. */
31266 if (this_reg && this_reg != this_param)
31267 emit_move_insn (this_param, this_reg);
31269 fnaddr = XEXP (DECL_RTL (function), 0);
31272 if (!flag_pic || targetm.binds_local_p (function)
31273 || cfun->machine->call_abi == MS_ABI)
31277 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
31278 tmp = gen_rtx_CONST (Pmode, tmp);
31279 fnaddr = gen_rtx_MEM (Pmode, tmp);
31284 if (!flag_pic || targetm.binds_local_p (function))
31287 else if (TARGET_MACHO)
31289 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
31290 fnaddr = XEXP (fnaddr, 0);
31292 #endif /* TARGET_MACHO */
31295 tmp = gen_rtx_REG (Pmode, CX_REG);
31296 output_set_got (tmp, NULL_RTX);
31298 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
31299 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
31300 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
31304 /* Our sibling call patterns do not allow memories, because we have no
31305 predicate that can distinguish between frame and non-frame memory.
31306 For our purposes here, we can get away with (ab)using a jump pattern,
31307 because we're going to do no optimization. */
31308 if (MEM_P (fnaddr))
31309 emit_jump_insn (gen_indirect_jump (fnaddr));
31312 tmp = gen_rtx_MEM (QImode, fnaddr);
31313 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
31314 tmp = emit_call_insn (tmp);
31315 SIBLING_CALL_P (tmp) = 1;
31319 /* Emit just enough of rest_of_compilation to get the insns emitted.
31320 Note that use_thunk calls assemble_start_function et al. */
31321 tmp = get_insns ();
31322 insn_locators_alloc ();
31323 shorten_branches (tmp);
31324 final_start_function (tmp, file, 1);
31325 final (tmp, file, 1);
31326 final_end_function ();
31330 x86_file_start (void)
31332 default_file_start ();
31334 darwin_file_start ();
31336 if (X86_FILE_START_VERSION_DIRECTIVE)
31337 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
31338 if (X86_FILE_START_FLTUSED)
31339 fputs ("\t.global\t__fltused\n", asm_out_file);
31340 if (ix86_asm_dialect == ASM_INTEL)
31341 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
31345 x86_field_alignment (tree field, int computed)
31347 enum machine_mode mode;
31348 tree type = TREE_TYPE (field);
31350 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
31352 mode = TYPE_MODE (strip_array_types (type));
31353 if (mode == DFmode || mode == DCmode
31354 || GET_MODE_CLASS (mode) == MODE_INT
31355 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
31356 return MIN (32, computed);
31360 /* Output assembler code to FILE to increment profiler label # LABELNO
31361 for profiling a function entry. */
31363 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
31365 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
31370 #ifndef NO_PROFILE_COUNTERS
31371 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
31374 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
31375 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
31377 fprintf (file, "\tcall\t%s\n", mcount_name);
31381 #ifndef NO_PROFILE_COUNTERS
31382 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
31385 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
31389 #ifndef NO_PROFILE_COUNTERS
31390 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
31393 fprintf (file, "\tcall\t%s\n", mcount_name);
31397 /* We don't have exact information about the insn sizes, but we may assume
31398 quite safely that we are informed about all 1 byte insns and memory
31399 address sizes. This is enough to eliminate unnecessary padding in
31403 min_insn_size (rtx insn)
31407 if (!INSN_P (insn) || !active_insn_p (insn))
31410 /* Discard alignments we've emit and jump instructions. */
31411 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
31412 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
31414 if (JUMP_TABLE_DATA_P (insn))
31417 /* Important case - calls are always 5 bytes.
31418 It is common to have many calls in the row. */
31420 && symbolic_reference_mentioned_p (PATTERN (insn))
31421 && !SIBLING_CALL_P (insn))
31423 len = get_attr_length (insn);
31427 /* For normal instructions we rely on get_attr_length being exact,
31428 with a few exceptions. */
31429 if (!JUMP_P (insn))
31431 enum attr_type type = get_attr_type (insn);
31436 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
31437 || asm_noperands (PATTERN (insn)) >= 0)
31444 /* Otherwise trust get_attr_length. */
31448 l = get_attr_length_address (insn);
31449 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
31458 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
31460 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
31464 ix86_avoid_jump_mispredicts (void)
31466 rtx insn, start = get_insns ();
31467 int nbytes = 0, njumps = 0;
31470 /* Look for all minimal intervals of instructions containing 4 jumps.
31471 The intervals are bounded by START and INSN. NBYTES is the total
31472 size of instructions in the interval including INSN and not including
31473 START. When the NBYTES is smaller than 16 bytes, it is possible
31474 that the end of START and INSN ends up in the same 16byte page.
31476 The smallest offset in the page INSN can start is the case where START
31477 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
31478 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
31480 for (insn = start; insn; insn = NEXT_INSN (insn))
31484 if (LABEL_P (insn))
31486 int align = label_to_alignment (insn);
31487 int max_skip = label_to_max_skip (insn);
31491 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
31492 already in the current 16 byte page, because otherwise
31493 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
31494 bytes to reach 16 byte boundary. */
31496 || (align <= 3 && max_skip != (1 << align) - 1))
31499 fprintf (dump_file, "Label %i with max_skip %i\n",
31500 INSN_UID (insn), max_skip);
31503 while (nbytes + max_skip >= 16)
31505 start = NEXT_INSN (start);
31506 if ((JUMP_P (start)
31507 && GET_CODE (PATTERN (start)) != ADDR_VEC
31508 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
31510 njumps--, isjump = 1;
31513 nbytes -= min_insn_size (start);
31519 min_size = min_insn_size (insn);
31520 nbytes += min_size;
31522 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
31523 INSN_UID (insn), min_size);
31525 && GET_CODE (PATTERN (insn)) != ADDR_VEC
31526 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
31534 start = NEXT_INSN (start);
31535 if ((JUMP_P (start)
31536 && GET_CODE (PATTERN (start)) != ADDR_VEC
31537 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
31539 njumps--, isjump = 1;
31542 nbytes -= min_insn_size (start);
31544 gcc_assert (njumps >= 0);
31546 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
31547 INSN_UID (start), INSN_UID (insn), nbytes);
31549 if (njumps == 3 && isjump && nbytes < 16)
31551 int padsize = 15 - nbytes + min_insn_size (insn);
31554 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
31555 INSN_UID (insn), padsize);
31556 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
31562 /* AMD Athlon works faster
31563 when RET is not destination of conditional jump or directly preceded
31564 by other jump instruction. We avoid the penalty by inserting NOP just
31565 before the RET instructions in such cases. */
31567 ix86_pad_returns (void)
31572 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
31574 basic_block bb = e->src;
31575 rtx ret = BB_END (bb);
31577 bool replace = false;
31579 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
31580 || optimize_bb_for_size_p (bb))
31582 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
31583 if (active_insn_p (prev) || LABEL_P (prev))
31585 if (prev && LABEL_P (prev))
31590 FOR_EACH_EDGE (e, ei, bb->preds)
31591 if (EDGE_FREQUENCY (e) && e->src->index >= 0
31592 && !(e->flags & EDGE_FALLTHRU))
31597 prev = prev_active_insn (ret);
31599 && ((JUMP_P (prev) && any_condjump_p (prev))
31602 /* Empty functions get branch mispredict even when
31603 the jump destination is not visible to us. */
31604 if (!prev && !optimize_function_for_size_p (cfun))
31609 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
31615 /* Count the minimum number of instructions in BB. Return 4 if the
31616 number of instructions >= 4. */
31619 ix86_count_insn_bb (basic_block bb)
31622 int insn_count = 0;
31624 /* Count number of instructions in this block. Return 4 if the number
31625 of instructions >= 4. */
31626 FOR_BB_INSNS (bb, insn)
31628 /* Only happen in exit blocks. */
31630 && ANY_RETURN_P (PATTERN (insn)))
31633 if (NONDEBUG_INSN_P (insn)
31634 && GET_CODE (PATTERN (insn)) != USE
31635 && GET_CODE (PATTERN (insn)) != CLOBBER)
31638 if (insn_count >= 4)
31647 /* Count the minimum number of instructions in code path in BB.
31648 Return 4 if the number of instructions >= 4. */
31651 ix86_count_insn (basic_block bb)
31655 int min_prev_count;
31657 /* Only bother counting instructions along paths with no
31658 more than 2 basic blocks between entry and exit. Given
31659 that BB has an edge to exit, determine if a predecessor
31660 of BB has an edge from entry. If so, compute the number
31661 of instructions in the predecessor block. If there
31662 happen to be multiple such blocks, compute the minimum. */
31663 min_prev_count = 4;
31664 FOR_EACH_EDGE (e, ei, bb->preds)
31667 edge_iterator prev_ei;
31669 if (e->src == ENTRY_BLOCK_PTR)
31671 min_prev_count = 0;
31674 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
31676 if (prev_e->src == ENTRY_BLOCK_PTR)
31678 int count = ix86_count_insn_bb (e->src);
31679 if (count < min_prev_count)
31680 min_prev_count = count;
31686 if (min_prev_count < 4)
31687 min_prev_count += ix86_count_insn_bb (bb);
31689 return min_prev_count;
31692 /* Pad short funtion to 4 instructions. */
31695 ix86_pad_short_function (void)
31700 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
31702 rtx ret = BB_END (e->src);
31703 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
31705 int insn_count = ix86_count_insn (e->src);
31707 /* Pad short function. */
31708 if (insn_count < 4)
31712 /* Find epilogue. */
31715 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
31716 insn = PREV_INSN (insn);
31721 /* Two NOPs count as one instruction. */
31722 insn_count = 2 * (4 - insn_count);
31723 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
31729 /* Implement machine specific optimizations. We implement padding of returns
31730 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
31734 /* We are freeing block_for_insn in the toplev to keep compatibility
31735 with old MDEP_REORGS that are not CFG based. Recompute it now. */
31736 compute_bb_for_insn ();
31738 /* Run the vzeroupper optimization if needed. */
31739 if (TARGET_VZEROUPPER)
31740 move_or_delete_vzeroupper ();
31742 if (optimize && optimize_function_for_speed_p (cfun))
31744 if (TARGET_PAD_SHORT_FUNCTION)
31745 ix86_pad_short_function ();
31746 else if (TARGET_PAD_RETURNS)
31747 ix86_pad_returns ();
31748 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
31749 if (TARGET_FOUR_JUMP_LIMIT)
31750 ix86_avoid_jump_mispredicts ();
31755 /* Return nonzero when QImode register that must be represented via REX prefix
31758 x86_extended_QIreg_mentioned_p (rtx insn)
31761 extract_insn_cached (insn);
31762 for (i = 0; i < recog_data.n_operands; i++)
31763 if (REG_P (recog_data.operand[i])
31764 && REGNO (recog_data.operand[i]) > BX_REG)
31769 /* Return nonzero when P points to register encoded via REX prefix.
31770 Called via for_each_rtx. */
31772 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
31774 unsigned int regno;
31777 regno = REGNO (*p);
31778 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
31781 /* Return true when INSN mentions register that must be encoded using REX
31784 x86_extended_reg_mentioned_p (rtx insn)
31786 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
31787 extended_reg_mentioned_1, NULL);
31790 /* If profitable, negate (without causing overflow) integer constant
31791 of mode MODE at location LOC. Return true in this case. */
31793 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
31797 if (!CONST_INT_P (*loc))
31803 /* DImode x86_64 constants must fit in 32 bits. */
31804 gcc_assert (x86_64_immediate_operand (*loc, mode));
31815 gcc_unreachable ();
31818 /* Avoid overflows. */
31819 if (mode_signbit_p (mode, *loc))
31822 val = INTVAL (*loc);
31824 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
31825 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
31826 if ((val < 0 && val != -128)
31829 *loc = GEN_INT (-val);
31836 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
31837 optabs would emit if we didn't have TFmode patterns. */
31840 x86_emit_floatuns (rtx operands[2])
31842 rtx neglab, donelab, i0, i1, f0, in, out;
31843 enum machine_mode mode, inmode;
31845 inmode = GET_MODE (operands[1]);
31846 gcc_assert (inmode == SImode || inmode == DImode);
31849 in = force_reg (inmode, operands[1]);
31850 mode = GET_MODE (out);
31851 neglab = gen_label_rtx ();
31852 donelab = gen_label_rtx ();
31853 f0 = gen_reg_rtx (mode);
31855 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
31857 expand_float (out, in, 0);
31859 emit_jump_insn (gen_jump (donelab));
31862 emit_label (neglab);
31864 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
31866 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
31868 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
31870 expand_float (f0, i0, 0);
31872 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
31874 emit_label (donelab);
31877 /* AVX2 does support 32-byte integer vector operations,
31878 thus the longest vector we are faced with is V32QImode. */
31879 #define MAX_VECT_LEN 32
31881 struct expand_vec_perm_d
31883 rtx target, op0, op1;
31884 unsigned char perm[MAX_VECT_LEN];
31885 enum machine_mode vmode;
31886 unsigned char nelt;
31890 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
31891 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
31893 /* Get a vector mode of the same size as the original but with elements
31894 twice as wide. This is only guaranteed to apply to integral vectors. */
31896 static inline enum machine_mode
31897 get_mode_wider_vector (enum machine_mode o)
31899 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
31900 enum machine_mode n = GET_MODE_WIDER_MODE (o);
31901 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
31902 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
31906 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
31907 with all elements equal to VAR. Return true if successful. */
31910 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
31911 rtx target, rtx val)
31934 /* First attempt to recognize VAL as-is. */
31935 dup = gen_rtx_VEC_DUPLICATE (mode, val);
31936 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
31937 if (recog_memoized (insn) < 0)
31940 /* If that fails, force VAL into a register. */
31943 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
31944 seq = get_insns ();
31947 emit_insn_before (seq, insn);
31949 ok = recog_memoized (insn) >= 0;
31958 if (TARGET_SSE || TARGET_3DNOW_A)
31962 val = gen_lowpart (SImode, val);
31963 x = gen_rtx_TRUNCATE (HImode, val);
31964 x = gen_rtx_VEC_DUPLICATE (mode, x);
31965 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31978 struct expand_vec_perm_d dperm;
31982 memset (&dperm, 0, sizeof (dperm));
31983 dperm.target = target;
31984 dperm.vmode = mode;
31985 dperm.nelt = GET_MODE_NUNITS (mode);
31986 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
31988 /* Extend to SImode using a paradoxical SUBREG. */
31989 tmp1 = gen_reg_rtx (SImode);
31990 emit_move_insn (tmp1, gen_lowpart (SImode, val));
31992 /* Insert the SImode value as low element of a V4SImode vector. */
31993 tmp2 = gen_lowpart (V4SImode, dperm.op0);
31994 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
31996 ok = (expand_vec_perm_1 (&dperm)
31997 || expand_vec_perm_broadcast_1 (&dperm));
32009 /* Replicate the value once into the next wider mode and recurse. */
32011 enum machine_mode smode, wsmode, wvmode;
32014 smode = GET_MODE_INNER (mode);
32015 wvmode = get_mode_wider_vector (mode);
32016 wsmode = GET_MODE_INNER (wvmode);
32018 val = convert_modes (wsmode, smode, val, true);
32019 x = expand_simple_binop (wsmode, ASHIFT, val,
32020 GEN_INT (GET_MODE_BITSIZE (smode)),
32021 NULL_RTX, 1, OPTAB_LIB_WIDEN);
32022 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
32024 x = gen_lowpart (wvmode, target);
32025 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
32033 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
32034 rtx x = gen_reg_rtx (hvmode);
32036 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
32039 x = gen_rtx_VEC_CONCAT (mode, x, x);
32040 emit_insn (gen_rtx_SET (VOIDmode, target, x));
32049 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32050 whose ONE_VAR element is VAR, and other elements are zero. Return true
32054 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
32055 rtx target, rtx var, int one_var)
32057 enum machine_mode vsimode;
32060 bool use_vector_set = false;
32065 /* For SSE4.1, we normally use vector set. But if the second
32066 element is zero and inter-unit moves are OK, we use movq
32068 use_vector_set = (TARGET_64BIT
32070 && !(TARGET_INTER_UNIT_MOVES
32076 use_vector_set = TARGET_SSE4_1;
32079 use_vector_set = TARGET_SSE2;
32082 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
32089 use_vector_set = TARGET_AVX;
32092 /* Use ix86_expand_vector_set in 64bit mode only. */
32093 use_vector_set = TARGET_AVX && TARGET_64BIT;
32099 if (use_vector_set)
32101 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
32102 var = force_reg (GET_MODE_INNER (mode), var);
32103 ix86_expand_vector_set (mmx_ok, target, var, one_var);
32119 var = force_reg (GET_MODE_INNER (mode), var);
32120 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
32121 emit_insn (gen_rtx_SET (VOIDmode, target, x));
32126 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
32127 new_target = gen_reg_rtx (mode);
32129 new_target = target;
32130 var = force_reg (GET_MODE_INNER (mode), var);
32131 x = gen_rtx_VEC_DUPLICATE (mode, var);
32132 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
32133 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
32136 /* We need to shuffle the value to the correct position, so
32137 create a new pseudo to store the intermediate result. */
32139 /* With SSE2, we can use the integer shuffle insns. */
32140 if (mode != V4SFmode && TARGET_SSE2)
32142 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
32144 GEN_INT (one_var == 1 ? 0 : 1),
32145 GEN_INT (one_var == 2 ? 0 : 1),
32146 GEN_INT (one_var == 3 ? 0 : 1)));
32147 if (target != new_target)
32148 emit_move_insn (target, new_target);
32152 /* Otherwise convert the intermediate result to V4SFmode and
32153 use the SSE1 shuffle instructions. */
32154 if (mode != V4SFmode)
32156 tmp = gen_reg_rtx (V4SFmode);
32157 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
32162 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
32164 GEN_INT (one_var == 1 ? 0 : 1),
32165 GEN_INT (one_var == 2 ? 0+4 : 1+4),
32166 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
32168 if (mode != V4SFmode)
32169 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
32170 else if (tmp != target)
32171 emit_move_insn (target, tmp);
32173 else if (target != new_target)
32174 emit_move_insn (target, new_target);
32179 vsimode = V4SImode;
32185 vsimode = V2SImode;
32191 /* Zero extend the variable element to SImode and recurse. */
32192 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
32194 x = gen_reg_rtx (vsimode);
32195 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
32197 gcc_unreachable ();
32199 emit_move_insn (target, gen_lowpart (mode, x));
32207 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32208 consisting of the values in VALS. It is known that all elements
32209 except ONE_VAR are constants. Return true if successful. */
32212 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
32213 rtx target, rtx vals, int one_var)
32215 rtx var = XVECEXP (vals, 0, one_var);
32216 enum machine_mode wmode;
32219 const_vec = copy_rtx (vals);
32220 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
32221 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
32229 /* For the two element vectors, it's just as easy to use
32230 the general case. */
32234 /* Use ix86_expand_vector_set in 64bit mode only. */
32257 /* There's no way to set one QImode entry easily. Combine
32258 the variable value with its adjacent constant value, and
32259 promote to an HImode set. */
32260 x = XVECEXP (vals, 0, one_var ^ 1);
32263 var = convert_modes (HImode, QImode, var, true);
32264 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
32265 NULL_RTX, 1, OPTAB_LIB_WIDEN);
32266 x = GEN_INT (INTVAL (x) & 0xff);
32270 var = convert_modes (HImode, QImode, var, true);
32271 x = gen_int_mode (INTVAL (x) << 8, HImode);
32273 if (x != const0_rtx)
32274 var = expand_simple_binop (HImode, IOR, var, x, var,
32275 1, OPTAB_LIB_WIDEN);
32277 x = gen_reg_rtx (wmode);
32278 emit_move_insn (x, gen_lowpart (wmode, const_vec));
32279 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
32281 emit_move_insn (target, gen_lowpart (mode, x));
32288 emit_move_insn (target, const_vec);
32289 ix86_expand_vector_set (mmx_ok, target, var, one_var);
32293 /* A subroutine of ix86_expand_vector_init_general. Use vector
32294 concatenate to handle the most general case: all values variable,
32295 and none identical. */
32298 ix86_expand_vector_init_concat (enum machine_mode mode,
32299 rtx target, rtx *ops, int n)
32301 enum machine_mode cmode, hmode = VOIDmode;
32302 rtx first[8], second[4];
32342 gcc_unreachable ();
32345 if (!register_operand (ops[1], cmode))
32346 ops[1] = force_reg (cmode, ops[1]);
32347 if (!register_operand (ops[0], cmode))
32348 ops[0] = force_reg (cmode, ops[0]);
32349 emit_insn (gen_rtx_SET (VOIDmode, target,
32350 gen_rtx_VEC_CONCAT (mode, ops[0],
32370 gcc_unreachable ();
32386 gcc_unreachable ();
32391 /* FIXME: We process inputs backward to help RA. PR 36222. */
32394 for (; i > 0; i -= 2, j--)
32396 first[j] = gen_reg_rtx (cmode);
32397 v = gen_rtvec (2, ops[i - 1], ops[i]);
32398 ix86_expand_vector_init (false, first[j],
32399 gen_rtx_PARALLEL (cmode, v));
32405 gcc_assert (hmode != VOIDmode);
32406 for (i = j = 0; i < n; i += 2, j++)
32408 second[j] = gen_reg_rtx (hmode);
32409 ix86_expand_vector_init_concat (hmode, second [j],
32413 ix86_expand_vector_init_concat (mode, target, second, n);
32416 ix86_expand_vector_init_concat (mode, target, first, n);
32420 gcc_unreachable ();
32424 /* A subroutine of ix86_expand_vector_init_general. Use vector
32425 interleave to handle the most general case: all values variable,
32426 and none identical. */
32429 ix86_expand_vector_init_interleave (enum machine_mode mode,
32430 rtx target, rtx *ops, int n)
32432 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
32435 rtx (*gen_load_even) (rtx, rtx, rtx);
32436 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
32437 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
32442 gen_load_even = gen_vec_setv8hi;
32443 gen_interleave_first_low = gen_vec_interleave_lowv4si;
32444 gen_interleave_second_low = gen_vec_interleave_lowv2di;
32445 inner_mode = HImode;
32446 first_imode = V4SImode;
32447 second_imode = V2DImode;
32448 third_imode = VOIDmode;
32451 gen_load_even = gen_vec_setv16qi;
32452 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
32453 gen_interleave_second_low = gen_vec_interleave_lowv4si;
32454 inner_mode = QImode;
32455 first_imode = V8HImode;
32456 second_imode = V4SImode;
32457 third_imode = V2DImode;
32460 gcc_unreachable ();
32463 for (i = 0; i < n; i++)
32465 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
32466 op0 = gen_reg_rtx (SImode);
32467 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
32469 /* Insert the SImode value as low element of V4SImode vector. */
32470 op1 = gen_reg_rtx (V4SImode);
32471 op0 = gen_rtx_VEC_MERGE (V4SImode,
32472 gen_rtx_VEC_DUPLICATE (V4SImode,
32474 CONST0_RTX (V4SImode),
32476 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
32478 /* Cast the V4SImode vector back to a vector in orignal mode. */
32479 op0 = gen_reg_rtx (mode);
32480 emit_move_insn (op0, gen_lowpart (mode, op1));
32482 /* Load even elements into the second positon. */
32483 emit_insn (gen_load_even (op0,
32484 force_reg (inner_mode,
32488 /* Cast vector to FIRST_IMODE vector. */
32489 ops[i] = gen_reg_rtx (first_imode);
32490 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
32493 /* Interleave low FIRST_IMODE vectors. */
32494 for (i = j = 0; i < n; i += 2, j++)
32496 op0 = gen_reg_rtx (first_imode);
32497 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
32499 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
32500 ops[j] = gen_reg_rtx (second_imode);
32501 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
32504 /* Interleave low SECOND_IMODE vectors. */
32505 switch (second_imode)
32508 for (i = j = 0; i < n / 2; i += 2, j++)
32510 op0 = gen_reg_rtx (second_imode);
32511 emit_insn (gen_interleave_second_low (op0, ops[i],
32514 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
32516 ops[j] = gen_reg_rtx (third_imode);
32517 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
32519 second_imode = V2DImode;
32520 gen_interleave_second_low = gen_vec_interleave_lowv2di;
32524 op0 = gen_reg_rtx (second_imode);
32525 emit_insn (gen_interleave_second_low (op0, ops[0],
32528 /* Cast the SECOND_IMODE vector back to a vector on original
32530 emit_insn (gen_rtx_SET (VOIDmode, target,
32531 gen_lowpart (mode, op0)));
32535 gcc_unreachable ();
32539 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
32540 all values variable, and none identical. */
32543 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
32544 rtx target, rtx vals)
32546 rtx ops[32], op0, op1;
32547 enum machine_mode half_mode = VOIDmode;
32554 if (!mmx_ok && !TARGET_SSE)
32566 n = GET_MODE_NUNITS (mode);
32567 for (i = 0; i < n; i++)
32568 ops[i] = XVECEXP (vals, 0, i);
32569 ix86_expand_vector_init_concat (mode, target, ops, n);
32573 half_mode = V16QImode;
32577 half_mode = V8HImode;
32581 n = GET_MODE_NUNITS (mode);
32582 for (i = 0; i < n; i++)
32583 ops[i] = XVECEXP (vals, 0, i);
32584 op0 = gen_reg_rtx (half_mode);
32585 op1 = gen_reg_rtx (half_mode);
32586 ix86_expand_vector_init_interleave (half_mode, op0, ops,
32588 ix86_expand_vector_init_interleave (half_mode, op1,
32589 &ops [n >> 1], n >> 2);
32590 emit_insn (gen_rtx_SET (VOIDmode, target,
32591 gen_rtx_VEC_CONCAT (mode, op0, op1)));
32595 if (!TARGET_SSE4_1)
32603 /* Don't use ix86_expand_vector_init_interleave if we can't
32604 move from GPR to SSE register directly. */
32605 if (!TARGET_INTER_UNIT_MOVES)
32608 n = GET_MODE_NUNITS (mode);
32609 for (i = 0; i < n; i++)
32610 ops[i] = XVECEXP (vals, 0, i);
32611 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
32619 gcc_unreachable ();
32623 int i, j, n_elts, n_words, n_elt_per_word;
32624 enum machine_mode inner_mode;
32625 rtx words[4], shift;
32627 inner_mode = GET_MODE_INNER (mode);
32628 n_elts = GET_MODE_NUNITS (mode);
32629 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
32630 n_elt_per_word = n_elts / n_words;
32631 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
32633 for (i = 0; i < n_words; ++i)
32635 rtx word = NULL_RTX;
32637 for (j = 0; j < n_elt_per_word; ++j)
32639 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
32640 elt = convert_modes (word_mode, inner_mode, elt, true);
32646 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
32647 word, 1, OPTAB_LIB_WIDEN);
32648 word = expand_simple_binop (word_mode, IOR, word, elt,
32649 word, 1, OPTAB_LIB_WIDEN);
32657 emit_move_insn (target, gen_lowpart (mode, words[0]));
32658 else if (n_words == 2)
32660 rtx tmp = gen_reg_rtx (mode);
32661 emit_clobber (tmp);
32662 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
32663 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
32664 emit_move_insn (target, tmp);
32666 else if (n_words == 4)
32668 rtx tmp = gen_reg_rtx (V4SImode);
32669 gcc_assert (word_mode == SImode);
32670 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
32671 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
32672 emit_move_insn (target, gen_lowpart (mode, tmp));
32675 gcc_unreachable ();
32679 /* Initialize vector TARGET via VALS. Suppress the use of MMX
32680 instructions unless MMX_OK is true. */
32683 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
32685 enum machine_mode mode = GET_MODE (target);
32686 enum machine_mode inner_mode = GET_MODE_INNER (mode);
32687 int n_elts = GET_MODE_NUNITS (mode);
32688 int n_var = 0, one_var = -1;
32689 bool all_same = true, all_const_zero = true;
32693 for (i = 0; i < n_elts; ++i)
32695 x = XVECEXP (vals, 0, i);
32696 if (!(CONST_INT_P (x)
32697 || GET_CODE (x) == CONST_DOUBLE
32698 || GET_CODE (x) == CONST_FIXED))
32699 n_var++, one_var = i;
32700 else if (x != CONST0_RTX (inner_mode))
32701 all_const_zero = false;
32702 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
32706 /* Constants are best loaded from the constant pool. */
32709 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
32713 /* If all values are identical, broadcast the value. */
32715 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
32716 XVECEXP (vals, 0, 0)))
32719 /* Values where only one field is non-constant are best loaded from
32720 the pool and overwritten via move later. */
32724 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
32725 XVECEXP (vals, 0, one_var),
32729 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
32733 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
32737 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
32739 enum machine_mode mode = GET_MODE (target);
32740 enum machine_mode inner_mode = GET_MODE_INNER (mode);
32741 enum machine_mode half_mode;
32742 bool use_vec_merge = false;
32744 static rtx (*gen_extract[6][2]) (rtx, rtx)
32746 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
32747 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
32748 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
32749 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
32750 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
32751 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
32753 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
32755 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
32756 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
32757 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
32758 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
32759 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
32760 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
32770 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
32771 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
32773 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
32775 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
32776 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32782 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
32786 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
32787 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
32789 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
32791 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
32792 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32799 /* For the two element vectors, we implement a VEC_CONCAT with
32800 the extraction of the other element. */
32802 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
32803 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
32806 op0 = val, op1 = tmp;
32808 op0 = tmp, op1 = val;
32810 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
32811 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32816 use_vec_merge = TARGET_SSE4_1;
32823 use_vec_merge = true;
32827 /* tmp = target = A B C D */
32828 tmp = copy_to_reg (target);
32829 /* target = A A B B */
32830 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
32831 /* target = X A B B */
32832 ix86_expand_vector_set (false, target, val, 0);
32833 /* target = A X C D */
32834 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
32835 const1_rtx, const0_rtx,
32836 GEN_INT (2+4), GEN_INT (3+4)));
32840 /* tmp = target = A B C D */
32841 tmp = copy_to_reg (target);
32842 /* tmp = X B C D */
32843 ix86_expand_vector_set (false, tmp, val, 0);
32844 /* target = A B X D */
32845 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
32846 const0_rtx, const1_rtx,
32847 GEN_INT (0+4), GEN_INT (3+4)));
32851 /* tmp = target = A B C D */
32852 tmp = copy_to_reg (target);
32853 /* tmp = X B C D */
32854 ix86_expand_vector_set (false, tmp, val, 0);
32855 /* target = A B X D */
32856 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
32857 const0_rtx, const1_rtx,
32858 GEN_INT (2+4), GEN_INT (0+4)));
32862 gcc_unreachable ();
32867 use_vec_merge = TARGET_SSE4_1;
32871 /* Element 0 handled by vec_merge below. */
32874 use_vec_merge = true;
32880 /* With SSE2, use integer shuffles to swap element 0 and ELT,
32881 store into element 0, then shuffle them back. */
32885 order[0] = GEN_INT (elt);
32886 order[1] = const1_rtx;
32887 order[2] = const2_rtx;
32888 order[3] = GEN_INT (3);
32889 order[elt] = const0_rtx;
32891 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
32892 order[1], order[2], order[3]));
32894 ix86_expand_vector_set (false, target, val, 0);
32896 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
32897 order[1], order[2], order[3]));
32901 /* For SSE1, we have to reuse the V4SF code. */
32902 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
32903 gen_lowpart (SFmode, val), elt);
32908 use_vec_merge = TARGET_SSE2;
32911 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
32915 use_vec_merge = TARGET_SSE4_1;
32922 half_mode = V16QImode;
32928 half_mode = V8HImode;
32934 half_mode = V4SImode;
32940 half_mode = V2DImode;
32946 half_mode = V4SFmode;
32952 half_mode = V2DFmode;
32958 /* Compute offset. */
32962 gcc_assert (i <= 1);
32964 /* Extract the half. */
32965 tmp = gen_reg_rtx (half_mode);
32966 emit_insn (gen_extract[j][i] (tmp, target));
32968 /* Put val in tmp at elt. */
32969 ix86_expand_vector_set (false, tmp, val, elt);
32972 emit_insn (gen_insert[j][i] (target, target, tmp));
32981 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
32982 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
32983 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32987 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
32989 emit_move_insn (mem, target);
32991 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
32992 emit_move_insn (tmp, val);
32994 emit_move_insn (target, mem);
32999 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
33001 enum machine_mode mode = GET_MODE (vec);
33002 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33003 bool use_vec_extr = false;
33016 use_vec_extr = true;
33020 use_vec_extr = TARGET_SSE4_1;
33032 tmp = gen_reg_rtx (mode);
33033 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
33034 GEN_INT (elt), GEN_INT (elt),
33035 GEN_INT (elt+4), GEN_INT (elt+4)));
33039 tmp = gen_reg_rtx (mode);
33040 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
33044 gcc_unreachable ();
33047 use_vec_extr = true;
33052 use_vec_extr = TARGET_SSE4_1;
33066 tmp = gen_reg_rtx (mode);
33067 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
33068 GEN_INT (elt), GEN_INT (elt),
33069 GEN_INT (elt), GEN_INT (elt)));
33073 tmp = gen_reg_rtx (mode);
33074 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
33078 gcc_unreachable ();
33081 use_vec_extr = true;
33086 /* For SSE1, we have to reuse the V4SF code. */
33087 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
33088 gen_lowpart (V4SFmode, vec), elt);
33094 use_vec_extr = TARGET_SSE2;
33097 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
33101 use_vec_extr = TARGET_SSE4_1;
33107 tmp = gen_reg_rtx (V4SFmode);
33109 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
33111 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
33112 ix86_expand_vector_extract (false, target, tmp, elt & 3);
33120 tmp = gen_reg_rtx (V2DFmode);
33122 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
33124 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
33125 ix86_expand_vector_extract (false, target, tmp, elt & 1);
33133 tmp = gen_reg_rtx (V16QImode);
33135 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
33137 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
33138 ix86_expand_vector_extract (false, target, tmp, elt & 15);
33146 tmp = gen_reg_rtx (V8HImode);
33148 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
33150 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
33151 ix86_expand_vector_extract (false, target, tmp, elt & 7);
33159 tmp = gen_reg_rtx (V4SImode);
33161 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
33163 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
33164 ix86_expand_vector_extract (false, target, tmp, elt & 3);
33172 tmp = gen_reg_rtx (V2DImode);
33174 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
33176 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
33177 ix86_expand_vector_extract (false, target, tmp, elt & 1);
33183 /* ??? Could extract the appropriate HImode element and shift. */
33190 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
33191 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
33193 /* Let the rtl optimizers know about the zero extension performed. */
33194 if (inner_mode == QImode || inner_mode == HImode)
33196 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
33197 target = gen_lowpart (SImode, target);
33200 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33204 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
33206 emit_move_insn (mem, vec);
33208 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
33209 emit_move_insn (target, tmp);
33213 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
33214 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
33215 The upper bits of DEST are undefined, though they shouldn't cause
33216 exceptions (some bits from src or all zeros are ok). */
33219 emit_reduc_half (rtx dest, rtx src, int i)
33222 switch (GET_MODE (src))
33226 tem = gen_sse_movhlps (dest, src, src);
33228 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
33229 GEN_INT (1 + 4), GEN_INT (1 + 4));
33232 tem = gen_vec_interleave_highv2df (dest, src, src);
33238 tem = gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, dest),
33239 gen_lowpart (V1TImode, src),
33244 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
33246 tem = gen_avx_shufps256 (dest, src, src,
33247 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
33251 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
33253 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
33260 tem = gen_avx2_permv2ti (gen_lowpart (V4DImode, dest),
33261 gen_lowpart (V4DImode, src),
33262 gen_lowpart (V4DImode, src),
33265 tem = gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, dest),
33266 gen_lowpart (V2TImode, src),
33270 gcc_unreachable ();
33275 /* Expand a vector reduction. FN is the binary pattern to reduce;
33276 DEST is the destination; IN is the input vector. */
33279 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
33281 rtx half, dst, vec = in;
33282 enum machine_mode mode = GET_MODE (in);
33285 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
33287 && mode == V8HImode
33288 && fn == gen_uminv8hi3)
33290 emit_insn (gen_sse4_1_phminposuw (dest, in));
33294 for (i = GET_MODE_BITSIZE (mode);
33295 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
33298 half = gen_reg_rtx (mode);
33299 emit_reduc_half (half, vec, i);
33300 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
33303 dst = gen_reg_rtx (mode);
33304 emit_insn (fn (dst, half, vec));
33309 /* Target hook for scalar_mode_supported_p. */
33311 ix86_scalar_mode_supported_p (enum machine_mode mode)
33313 if (DECIMAL_FLOAT_MODE_P (mode))
33314 return default_decimal_float_supported_p ();
33315 else if (mode == TFmode)
33318 return default_scalar_mode_supported_p (mode);
33321 /* Implements target hook vector_mode_supported_p. */
33323 ix86_vector_mode_supported_p (enum machine_mode mode)
33325 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
33327 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
33329 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
33331 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
33333 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
33338 /* Target hook for c_mode_for_suffix. */
33339 static enum machine_mode
33340 ix86_c_mode_for_suffix (char suffix)
33350 /* Worker function for TARGET_MD_ASM_CLOBBERS.
33352 We do this in the new i386 backend to maintain source compatibility
33353 with the old cc0-based compiler. */
33356 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
33357 tree inputs ATTRIBUTE_UNUSED,
33360 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
33362 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
33367 /* Implements target vector targetm.asm.encode_section_info. */
33369 static void ATTRIBUTE_UNUSED
33370 ix86_encode_section_info (tree decl, rtx rtl, int first)
33372 default_encode_section_info (decl, rtl, first);
33374 if (TREE_CODE (decl) == VAR_DECL
33375 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
33376 && ix86_in_large_data_p (decl))
33377 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
33380 /* Worker function for REVERSE_CONDITION. */
33383 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
33385 return (mode != CCFPmode && mode != CCFPUmode
33386 ? reverse_condition (code)
33387 : reverse_condition_maybe_unordered (code));
33390 /* Output code to perform an x87 FP register move, from OPERANDS[1]
33394 output_387_reg_move (rtx insn, rtx *operands)
33396 if (REG_P (operands[0]))
33398 if (REG_P (operands[1])
33399 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
33401 if (REGNO (operands[0]) == FIRST_STACK_REG)
33402 return output_387_ffreep (operands, 0);
33403 return "fstp\t%y0";
33405 if (STACK_TOP_P (operands[0]))
33406 return "fld%Z1\t%y1";
33409 else if (MEM_P (operands[0]))
33411 gcc_assert (REG_P (operands[1]));
33412 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
33413 return "fstp%Z0\t%y0";
33416 /* There is no non-popping store to memory for XFmode.
33417 So if we need one, follow the store with a load. */
33418 if (GET_MODE (operands[0]) == XFmode)
33419 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
33421 return "fst%Z0\t%y0";
33428 /* Output code to perform a conditional jump to LABEL, if C2 flag in
33429 FP status register is set. */
33432 ix86_emit_fp_unordered_jump (rtx label)
33434 rtx reg = gen_reg_rtx (HImode);
33437 emit_insn (gen_x86_fnstsw_1 (reg));
33439 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
33441 emit_insn (gen_x86_sahf_1 (reg));
33443 temp = gen_rtx_REG (CCmode, FLAGS_REG);
33444 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
33448 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
33450 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
33451 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
33454 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
33455 gen_rtx_LABEL_REF (VOIDmode, label),
33457 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
33459 emit_jump_insn (temp);
33460 predict_jump (REG_BR_PROB_BASE * 10 / 100);
33463 /* Output code to perform a log1p XFmode calculation. */
33465 void ix86_emit_i387_log1p (rtx op0, rtx op1)
33467 rtx label1 = gen_label_rtx ();
33468 rtx label2 = gen_label_rtx ();
33470 rtx tmp = gen_reg_rtx (XFmode);
33471 rtx tmp2 = gen_reg_rtx (XFmode);
33474 emit_insn (gen_absxf2 (tmp, op1));
33475 test = gen_rtx_GE (VOIDmode, tmp,
33476 CONST_DOUBLE_FROM_REAL_VALUE (
33477 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
33479 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
33481 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
33482 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
33483 emit_jump (label2);
33485 emit_label (label1);
33486 emit_move_insn (tmp, CONST1_RTX (XFmode));
33487 emit_insn (gen_addxf3 (tmp, op1, tmp));
33488 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
33489 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
33491 emit_label (label2);
33494 /* Emit code for round calculation. */
33495 void ix86_emit_i387_round (rtx op0, rtx op1)
33497 enum machine_mode inmode = GET_MODE (op1);
33498 enum machine_mode outmode = GET_MODE (op0);
33499 rtx e1, e2, res, tmp, tmp1, half;
33500 rtx scratch = gen_reg_rtx (HImode);
33501 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
33502 rtx jump_label = gen_label_rtx ();
33504 rtx (*gen_abs) (rtx, rtx);
33505 rtx (*gen_neg) (rtx, rtx);
33510 gen_abs = gen_abssf2;
33513 gen_abs = gen_absdf2;
33516 gen_abs = gen_absxf2;
33519 gcc_unreachable ();
33525 gen_neg = gen_negsf2;
33528 gen_neg = gen_negdf2;
33531 gen_neg = gen_negxf2;
33534 gen_neg = gen_neghi2;
33537 gen_neg = gen_negsi2;
33540 gen_neg = gen_negdi2;
33543 gcc_unreachable ();
33546 e1 = gen_reg_rtx (inmode);
33547 e2 = gen_reg_rtx (inmode);
33548 res = gen_reg_rtx (outmode);
33550 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
33552 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
33554 /* scratch = fxam(op1) */
33555 emit_insn (gen_rtx_SET (VOIDmode, scratch,
33556 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
33558 /* e1 = fabs(op1) */
33559 emit_insn (gen_abs (e1, op1));
33561 /* e2 = e1 + 0.5 */
33562 half = force_reg (inmode, half);
33563 emit_insn (gen_rtx_SET (VOIDmode, e2,
33564 gen_rtx_PLUS (inmode, e1, half)));
33566 /* res = floor(e2) */
33567 if (inmode != XFmode)
33569 tmp1 = gen_reg_rtx (XFmode);
33571 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
33572 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
33582 rtx tmp0 = gen_reg_rtx (XFmode);
33584 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
33586 emit_insn (gen_rtx_SET (VOIDmode, res,
33587 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
33588 UNSPEC_TRUNC_NOOP)));
33592 emit_insn (gen_frndintxf2_floor (res, tmp1));
33595 emit_insn (gen_lfloorxfhi2 (res, tmp1));
33598 emit_insn (gen_lfloorxfsi2 (res, tmp1));
33601 emit_insn (gen_lfloorxfdi2 (res, tmp1));
33604 gcc_unreachable ();
33607 /* flags = signbit(a) */
33608 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
33610 /* if (flags) then res = -res */
33611 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
33612 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
33613 gen_rtx_LABEL_REF (VOIDmode, jump_label),
33615 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
33616 predict_jump (REG_BR_PROB_BASE * 50 / 100);
33617 JUMP_LABEL (insn) = jump_label;
33619 emit_insn (gen_neg (res, res));
33621 emit_label (jump_label);
33622 LABEL_NUSES (jump_label) = 1;
33624 emit_move_insn (op0, res);
33627 /* Output code to perform a Newton-Rhapson approximation of a single precision
33628 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
33630 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
33632 rtx x0, x1, e0, e1;
33634 x0 = gen_reg_rtx (mode);
33635 e0 = gen_reg_rtx (mode);
33636 e1 = gen_reg_rtx (mode);
33637 x1 = gen_reg_rtx (mode);
33639 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
33641 /* x0 = rcp(b) estimate */
33642 emit_insn (gen_rtx_SET (VOIDmode, x0,
33643 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
33646 emit_insn (gen_rtx_SET (VOIDmode, e0,
33647 gen_rtx_MULT (mode, x0, b)));
33650 emit_insn (gen_rtx_SET (VOIDmode, e0,
33651 gen_rtx_MULT (mode, x0, e0)));
33654 emit_insn (gen_rtx_SET (VOIDmode, e1,
33655 gen_rtx_PLUS (mode, x0, x0)));
33658 emit_insn (gen_rtx_SET (VOIDmode, x1,
33659 gen_rtx_MINUS (mode, e1, e0)));
33662 emit_insn (gen_rtx_SET (VOIDmode, res,
33663 gen_rtx_MULT (mode, a, x1)));
33666 /* Output code to perform a Newton-Rhapson approximation of a
33667 single precision floating point [reciprocal] square root. */
33669 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
33672 rtx x0, e0, e1, e2, e3, mthree, mhalf;
33675 x0 = gen_reg_rtx (mode);
33676 e0 = gen_reg_rtx (mode);
33677 e1 = gen_reg_rtx (mode);
33678 e2 = gen_reg_rtx (mode);
33679 e3 = gen_reg_rtx (mode);
33681 real_from_integer (&r, VOIDmode, -3, -1, 0);
33682 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
33684 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
33685 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
33687 if (VECTOR_MODE_P (mode))
33689 mthree = ix86_build_const_vector (mode, true, mthree);
33690 mhalf = ix86_build_const_vector (mode, true, mhalf);
33693 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
33694 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
33696 /* x0 = rsqrt(a) estimate */
33697 emit_insn (gen_rtx_SET (VOIDmode, x0,
33698 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
33701 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
33706 zero = gen_reg_rtx (mode);
33707 mask = gen_reg_rtx (mode);
33709 zero = force_reg (mode, CONST0_RTX(mode));
33710 emit_insn (gen_rtx_SET (VOIDmode, mask,
33711 gen_rtx_NE (mode, zero, a)));
33713 emit_insn (gen_rtx_SET (VOIDmode, x0,
33714 gen_rtx_AND (mode, x0, mask)));
33718 emit_insn (gen_rtx_SET (VOIDmode, e0,
33719 gen_rtx_MULT (mode, x0, a)));
33721 emit_insn (gen_rtx_SET (VOIDmode, e1,
33722 gen_rtx_MULT (mode, e0, x0)));
33725 mthree = force_reg (mode, mthree);
33726 emit_insn (gen_rtx_SET (VOIDmode, e2,
33727 gen_rtx_PLUS (mode, e1, mthree)));
33729 mhalf = force_reg (mode, mhalf);
33731 /* e3 = -.5 * x0 */
33732 emit_insn (gen_rtx_SET (VOIDmode, e3,
33733 gen_rtx_MULT (mode, x0, mhalf)));
33735 /* e3 = -.5 * e0 */
33736 emit_insn (gen_rtx_SET (VOIDmode, e3,
33737 gen_rtx_MULT (mode, e0, mhalf)));
33738 /* ret = e2 * e3 */
33739 emit_insn (gen_rtx_SET (VOIDmode, res,
33740 gen_rtx_MULT (mode, e2, e3)));
33743 #ifdef TARGET_SOLARIS
33744 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
33747 i386_solaris_elf_named_section (const char *name, unsigned int flags,
33750 /* With Binutils 2.15, the "@unwind" marker must be specified on
33751 every occurrence of the ".eh_frame" section, not just the first
33754 && strcmp (name, ".eh_frame") == 0)
33756 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
33757 flags & SECTION_WRITE ? "aw" : "a");
33762 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
33764 solaris_elf_asm_comdat_section (name, flags, decl);
33769 default_elf_asm_named_section (name, flags, decl);
33771 #endif /* TARGET_SOLARIS */
33773 /* Return the mangling of TYPE if it is an extended fundamental type. */
33775 static const char *
33776 ix86_mangle_type (const_tree type)
33778 type = TYPE_MAIN_VARIANT (type);
33780 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
33781 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
33784 switch (TYPE_MODE (type))
33787 /* __float128 is "g". */
33790 /* "long double" or __float80 is "e". */
33797 /* For 32-bit code we can save PIC register setup by using
33798 __stack_chk_fail_local hidden function instead of calling
33799 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
33800 register, so it is better to call __stack_chk_fail directly. */
33802 static tree ATTRIBUTE_UNUSED
33803 ix86_stack_protect_fail (void)
33805 return TARGET_64BIT
33806 ? default_external_stack_protect_fail ()
33807 : default_hidden_stack_protect_fail ();
33810 /* Select a format to encode pointers in exception handling data. CODE
33811 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
33812 true if the symbol may be affected by dynamic relocations.
33814 ??? All x86 object file formats are capable of representing this.
33815 After all, the relocation needed is the same as for the call insn.
33816 Whether or not a particular assembler allows us to enter such, I
33817 guess we'll have to see. */
33819 asm_preferred_eh_data_format (int code, int global)
33823 int type = DW_EH_PE_sdata8;
33825 || ix86_cmodel == CM_SMALL_PIC
33826 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
33827 type = DW_EH_PE_sdata4;
33828 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
33830 if (ix86_cmodel == CM_SMALL
33831 || (ix86_cmodel == CM_MEDIUM && code))
33832 return DW_EH_PE_udata4;
33833 return DW_EH_PE_absptr;
33836 /* Expand copysign from SIGN to the positive value ABS_VALUE
33837 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
33840 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
33842 enum machine_mode mode = GET_MODE (sign);
33843 rtx sgn = gen_reg_rtx (mode);
33844 if (mask == NULL_RTX)
33846 enum machine_mode vmode;
33848 if (mode == SFmode)
33850 else if (mode == DFmode)
33855 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
33856 if (!VECTOR_MODE_P (mode))
33858 /* We need to generate a scalar mode mask in this case. */
33859 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
33860 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
33861 mask = gen_reg_rtx (mode);
33862 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
33866 mask = gen_rtx_NOT (mode, mask);
33867 emit_insn (gen_rtx_SET (VOIDmode, sgn,
33868 gen_rtx_AND (mode, mask, sign)));
33869 emit_insn (gen_rtx_SET (VOIDmode, result,
33870 gen_rtx_IOR (mode, abs_value, sgn)));
33873 /* Expand fabs (OP0) and return a new rtx that holds the result. The
33874 mask for masking out the sign-bit is stored in *SMASK, if that is
33877 ix86_expand_sse_fabs (rtx op0, rtx *smask)
33879 enum machine_mode vmode, mode = GET_MODE (op0);
33882 xa = gen_reg_rtx (mode);
33883 if (mode == SFmode)
33885 else if (mode == DFmode)
33889 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
33890 if (!VECTOR_MODE_P (mode))
33892 /* We need to generate a scalar mode mask in this case. */
33893 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
33894 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
33895 mask = gen_reg_rtx (mode);
33896 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
33898 emit_insn (gen_rtx_SET (VOIDmode, xa,
33899 gen_rtx_AND (mode, op0, mask)));
33907 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
33908 swapping the operands if SWAP_OPERANDS is true. The expanded
33909 code is a forward jump to a newly created label in case the
33910 comparison is true. The generated label rtx is returned. */
33912 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
33913 bool swap_operands)
33924 label = gen_label_rtx ();
33925 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
33926 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33927 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
33928 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
33929 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
33930 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
33931 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
33932 JUMP_LABEL (tmp) = label;
33937 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
33938 using comparison code CODE. Operands are swapped for the comparison if
33939 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
33941 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
33942 bool swap_operands)
33944 rtx (*insn)(rtx, rtx, rtx, rtx);
33945 enum machine_mode mode = GET_MODE (op0);
33946 rtx mask = gen_reg_rtx (mode);
33955 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
33957 emit_insn (insn (mask, op0, op1,
33958 gen_rtx_fmt_ee (code, mode, op0, op1)));
33962 /* Generate and return a rtx of mode MODE for 2**n where n is the number
33963 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
33965 ix86_gen_TWO52 (enum machine_mode mode)
33967 REAL_VALUE_TYPE TWO52r;
33970 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
33971 TWO52 = const_double_from_real_value (TWO52r, mode);
33972 TWO52 = force_reg (mode, TWO52);
33977 /* Expand SSE sequence for computing lround from OP1 storing
33980 ix86_expand_lround (rtx op0, rtx op1)
33982 /* C code for the stuff we're doing below:
33983 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
33986 enum machine_mode mode = GET_MODE (op1);
33987 const struct real_format *fmt;
33988 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
33991 /* load nextafter (0.5, 0.0) */
33992 fmt = REAL_MODE_FORMAT (mode);
33993 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
33994 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
33996 /* adj = copysign (0.5, op1) */
33997 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
33998 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
34000 /* adj = op1 + adj */
34001 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
34003 /* op0 = (imode)adj */
34004 expand_fix (op0, adj, 0);
34007 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
34010 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
34012 /* C code for the stuff we're doing below (for do_floor):
34014 xi -= (double)xi > op1 ? 1 : 0;
34017 enum machine_mode fmode = GET_MODE (op1);
34018 enum machine_mode imode = GET_MODE (op0);
34019 rtx ireg, freg, label, tmp;
34021 /* reg = (long)op1 */
34022 ireg = gen_reg_rtx (imode);
34023 expand_fix (ireg, op1, 0);
34025 /* freg = (double)reg */
34026 freg = gen_reg_rtx (fmode);
34027 expand_float (freg, ireg, 0);
34029 /* ireg = (freg > op1) ? ireg - 1 : ireg */
34030 label = ix86_expand_sse_compare_and_jump (UNLE,
34031 freg, op1, !do_floor);
34032 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
34033 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
34034 emit_move_insn (ireg, tmp);
34036 emit_label (label);
34037 LABEL_NUSES (label) = 1;
34039 emit_move_insn (op0, ireg);
34042 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
34043 result in OPERAND0. */
34045 ix86_expand_rint (rtx operand0, rtx operand1)
34047 /* C code for the stuff we're doing below:
34048 xa = fabs (operand1);
34049 if (!isless (xa, 2**52))
34051 xa = xa + 2**52 - 2**52;
34052 return copysign (xa, operand1);
34054 enum machine_mode mode = GET_MODE (operand0);
34055 rtx res, xa, label, TWO52, mask;
34057 res = gen_reg_rtx (mode);
34058 emit_move_insn (res, operand1);
34060 /* xa = abs (operand1) */
34061 xa = ix86_expand_sse_fabs (res, &mask);
34063 /* if (!isless (xa, TWO52)) goto label; */
34064 TWO52 = ix86_gen_TWO52 (mode);
34065 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34067 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
34068 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
34070 ix86_sse_copysign_to_positive (res, xa, res, mask);
34072 emit_label (label);
34073 LABEL_NUSES (label) = 1;
34075 emit_move_insn (operand0, res);
34078 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
34081 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
34083 /* C code for the stuff we expand below.
34084 double xa = fabs (x), x2;
34085 if (!isless (xa, TWO52))
34087 xa = xa + TWO52 - TWO52;
34088 x2 = copysign (xa, x);
34097 enum machine_mode mode = GET_MODE (operand0);
34098 rtx xa, TWO52, tmp, label, one, res, mask;
34100 TWO52 = ix86_gen_TWO52 (mode);
34102 /* Temporary for holding the result, initialized to the input
34103 operand to ease control flow. */
34104 res = gen_reg_rtx (mode);
34105 emit_move_insn (res, operand1);
34107 /* xa = abs (operand1) */
34108 xa = ix86_expand_sse_fabs (res, &mask);
34110 /* if (!isless (xa, TWO52)) goto label; */
34111 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34113 /* xa = xa + TWO52 - TWO52; */
34114 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
34115 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
34117 /* xa = copysign (xa, operand1) */
34118 ix86_sse_copysign_to_positive (xa, xa, res, mask);
34120 /* generate 1.0 or -1.0 */
34121 one = force_reg (mode,
34122 const_double_from_real_value (do_floor
34123 ? dconst1 : dconstm1, mode));
34125 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
34126 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
34127 emit_insn (gen_rtx_SET (VOIDmode, tmp,
34128 gen_rtx_AND (mode, one, tmp)));
34129 /* We always need to subtract here to preserve signed zero. */
34130 tmp = expand_simple_binop (mode, MINUS,
34131 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
34132 emit_move_insn (res, tmp);
34134 emit_label (label);
34135 LABEL_NUSES (label) = 1;
34137 emit_move_insn (operand0, res);
34140 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
34143 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
34145 /* C code for the stuff we expand below.
34146 double xa = fabs (x), x2;
34147 if (!isless (xa, TWO52))
34149 x2 = (double)(long)x;
34156 if (HONOR_SIGNED_ZEROS (mode))
34157 return copysign (x2, x);
34160 enum machine_mode mode = GET_MODE (operand0);
34161 rtx xa, xi, TWO52, tmp, label, one, res, mask;
34163 TWO52 = ix86_gen_TWO52 (mode);
34165 /* Temporary for holding the result, initialized to the input
34166 operand to ease control flow. */
34167 res = gen_reg_rtx (mode);
34168 emit_move_insn (res, operand1);
34170 /* xa = abs (operand1) */
34171 xa = ix86_expand_sse_fabs (res, &mask);
34173 /* if (!isless (xa, TWO52)) goto label; */
34174 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34176 /* xa = (double)(long)x */
34177 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
34178 expand_fix (xi, res, 0);
34179 expand_float (xa, xi, 0);
34182 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
34184 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
34185 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
34186 emit_insn (gen_rtx_SET (VOIDmode, tmp,
34187 gen_rtx_AND (mode, one, tmp)));
34188 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
34189 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
34190 emit_move_insn (res, tmp);
34192 if (HONOR_SIGNED_ZEROS (mode))
34193 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
34195 emit_label (label);
34196 LABEL_NUSES (label) = 1;
34198 emit_move_insn (operand0, res);
34201 /* Expand SSE sequence for computing round from OPERAND1 storing
34202 into OPERAND0. Sequence that works without relying on DImode truncation
34203 via cvttsd2siq that is only available on 64bit targets. */
34205 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
34207 /* C code for the stuff we expand below.
34208 double xa = fabs (x), xa2, x2;
34209 if (!isless (xa, TWO52))
34211 Using the absolute value and copying back sign makes
34212 -0.0 -> -0.0 correct.
34213 xa2 = xa + TWO52 - TWO52;
34218 else if (dxa > 0.5)
34220 x2 = copysign (xa2, x);
34223 enum machine_mode mode = GET_MODE (operand0);
34224 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
34226 TWO52 = ix86_gen_TWO52 (mode);
34228 /* Temporary for holding the result, initialized to the input
34229 operand to ease control flow. */
34230 res = gen_reg_rtx (mode);
34231 emit_move_insn (res, operand1);
34233 /* xa = abs (operand1) */
34234 xa = ix86_expand_sse_fabs (res, &mask);
34236 /* if (!isless (xa, TWO52)) goto label; */
34237 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34239 /* xa2 = xa + TWO52 - TWO52; */
34240 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
34241 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
34243 /* dxa = xa2 - xa; */
34244 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
34246 /* generate 0.5, 1.0 and -0.5 */
34247 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
34248 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
34249 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
34253 tmp = gen_reg_rtx (mode);
34254 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
34255 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
34256 emit_insn (gen_rtx_SET (VOIDmode, tmp,
34257 gen_rtx_AND (mode, one, tmp)));
34258 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
34259 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
34260 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
34261 emit_insn (gen_rtx_SET (VOIDmode, tmp,
34262 gen_rtx_AND (mode, one, tmp)));
34263 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
34265 /* res = copysign (xa2, operand1) */
34266 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
34268 emit_label (label);
34269 LABEL_NUSES (label) = 1;
34271 emit_move_insn (operand0, res);
34274 /* Expand SSE sequence for computing trunc from OPERAND1 storing
34277 ix86_expand_trunc (rtx operand0, rtx operand1)
34279 /* C code for SSE variant we expand below.
34280 double xa = fabs (x), x2;
34281 if (!isless (xa, TWO52))
34283 x2 = (double)(long)x;
34284 if (HONOR_SIGNED_ZEROS (mode))
34285 return copysign (x2, x);
34288 enum machine_mode mode = GET_MODE (operand0);
34289 rtx xa, xi, TWO52, label, res, mask;
34291 TWO52 = ix86_gen_TWO52 (mode);
34293 /* Temporary for holding the result, initialized to the input
34294 operand to ease control flow. */
34295 res = gen_reg_rtx (mode);
34296 emit_move_insn (res, operand1);
34298 /* xa = abs (operand1) */
34299 xa = ix86_expand_sse_fabs (res, &mask);
34301 /* if (!isless (xa, TWO52)) goto label; */
34302 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34304 /* x = (double)(long)x */
34305 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
34306 expand_fix (xi, res, 0);
34307 expand_float (res, xi, 0);
34309 if (HONOR_SIGNED_ZEROS (mode))
34310 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
34312 emit_label (label);
34313 LABEL_NUSES (label) = 1;
34315 emit_move_insn (operand0, res);
34318 /* Expand SSE sequence for computing trunc from OPERAND1 storing
34321 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
34323 enum machine_mode mode = GET_MODE (operand0);
34324 rtx xa, mask, TWO52, label, one, res, smask, tmp;
34326 /* C code for SSE variant we expand below.
34327 double xa = fabs (x), x2;
34328 if (!isless (xa, TWO52))
34330 xa2 = xa + TWO52 - TWO52;
34334 x2 = copysign (xa2, x);
34338 TWO52 = ix86_gen_TWO52 (mode);
34340 /* Temporary for holding the result, initialized to the input
34341 operand to ease control flow. */
34342 res = gen_reg_rtx (mode);
34343 emit_move_insn (res, operand1);
34345 /* xa = abs (operand1) */
34346 xa = ix86_expand_sse_fabs (res, &smask);
34348 /* if (!isless (xa, TWO52)) goto label; */
34349 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34351 /* res = xa + TWO52 - TWO52; */
34352 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
34353 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
34354 emit_move_insn (res, tmp);
34357 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
34359 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
34360 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
34361 emit_insn (gen_rtx_SET (VOIDmode, mask,
34362 gen_rtx_AND (mode, mask, one)));
34363 tmp = expand_simple_binop (mode, MINUS,
34364 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
34365 emit_move_insn (res, tmp);
34367 /* res = copysign (res, operand1) */
34368 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
34370 emit_label (label);
34371 LABEL_NUSES (label) = 1;
34373 emit_move_insn (operand0, res);
34376 /* Expand SSE sequence for computing round from OPERAND1 storing
34379 ix86_expand_round (rtx operand0, rtx operand1)
34381 /* C code for the stuff we're doing below:
34382 double xa = fabs (x);
34383 if (!isless (xa, TWO52))
34385 xa = (double)(long)(xa + nextafter (0.5, 0.0));
34386 return copysign (xa, x);
34388 enum machine_mode mode = GET_MODE (operand0);
34389 rtx res, TWO52, xa, label, xi, half, mask;
34390 const struct real_format *fmt;
34391 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
34393 /* Temporary for holding the result, initialized to the input
34394 operand to ease control flow. */
34395 res = gen_reg_rtx (mode);
34396 emit_move_insn (res, operand1);
34398 TWO52 = ix86_gen_TWO52 (mode);
34399 xa = ix86_expand_sse_fabs (res, &mask);
34400 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34402 /* load nextafter (0.5, 0.0) */
34403 fmt = REAL_MODE_FORMAT (mode);
34404 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
34405 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
34407 /* xa = xa + 0.5 */
34408 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
34409 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
34411 /* xa = (double)(int64_t)xa */
34412 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
34413 expand_fix (xi, xa, 0);
34414 expand_float (xa, xi, 0);
34416 /* res = copysign (xa, operand1) */
34417 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
34419 emit_label (label);
34420 LABEL_NUSES (label) = 1;
34422 emit_move_insn (operand0, res);
34425 /* Expand SSE sequence for computing round
34426 from OP1 storing into OP0 using sse4 round insn. */
34428 ix86_expand_round_sse4 (rtx op0, rtx op1)
34430 enum machine_mode mode = GET_MODE (op0);
34431 rtx e1, e2, res, half;
34432 const struct real_format *fmt;
34433 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
34434 rtx (*gen_copysign) (rtx, rtx, rtx);
34435 rtx (*gen_round) (rtx, rtx, rtx);
34440 gen_copysign = gen_copysignsf3;
34441 gen_round = gen_sse4_1_roundsf2;
34444 gen_copysign = gen_copysigndf3;
34445 gen_round = gen_sse4_1_rounddf2;
34448 gcc_unreachable ();
34451 /* round (a) = trunc (a + copysign (0.5, a)) */
34453 /* load nextafter (0.5, 0.0) */
34454 fmt = REAL_MODE_FORMAT (mode);
34455 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
34456 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
34457 half = const_double_from_real_value (pred_half, mode);
34459 /* e1 = copysign (0.5, op1) */
34460 e1 = gen_reg_rtx (mode);
34461 emit_insn (gen_copysign (e1, half, op1));
34463 /* e2 = op1 + e1 */
34464 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
34466 /* res = trunc (e2) */
34467 res = gen_reg_rtx (mode);
34468 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
34470 emit_move_insn (op0, res);
34474 /* Table of valid machine attributes. */
34475 static const struct attribute_spec ix86_attribute_table[] =
34477 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
34478 affects_type_identity } */
34479 /* Stdcall attribute says callee is responsible for popping arguments
34480 if they are not variable. */
34481 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34483 /* Fastcall attribute says callee is responsible for popping arguments
34484 if they are not variable. */
34485 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34487 /* Thiscall attribute says callee is responsible for popping arguments
34488 if they are not variable. */
34489 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34491 /* Cdecl attribute says the callee is a normal C declaration */
34492 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34494 /* Regparm attribute specifies how many integer arguments are to be
34495 passed in registers. */
34496 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
34498 /* Sseregparm attribute says we are using x86_64 calling conventions
34499 for FP arguments. */
34500 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34502 /* force_align_arg_pointer says this function realigns the stack at entry. */
34503 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
34504 false, true, true, ix86_handle_cconv_attribute, false },
34505 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34506 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
34507 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
34508 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
34511 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
34513 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
34515 #ifdef SUBTARGET_ATTRIBUTE_TABLE
34516 SUBTARGET_ATTRIBUTE_TABLE,
34518 /* ms_abi and sysv_abi calling convention function attributes. */
34519 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
34520 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
34521 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
34523 { "callee_pop_aggregate_return", 1, 1, false, true, true,
34524 ix86_handle_callee_pop_aggregate_return, true },
34526 { NULL, 0, 0, false, false, false, NULL, false }
34529 /* Implement targetm.vectorize.builtin_vectorization_cost. */
34531 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
34532 tree vectype ATTRIBUTE_UNUSED,
34533 int misalign ATTRIBUTE_UNUSED)
34535 switch (type_of_cost)
34538 return ix86_cost->scalar_stmt_cost;
34541 return ix86_cost->scalar_load_cost;
34544 return ix86_cost->scalar_store_cost;
34547 return ix86_cost->vec_stmt_cost;
34550 return ix86_cost->vec_align_load_cost;
34553 return ix86_cost->vec_store_cost;
34555 case vec_to_scalar:
34556 return ix86_cost->vec_to_scalar_cost;
34558 case scalar_to_vec:
34559 return ix86_cost->scalar_to_vec_cost;
34561 case unaligned_load:
34562 case unaligned_store:
34563 return ix86_cost->vec_unalign_load_cost;
34565 case cond_branch_taken:
34566 return ix86_cost->cond_taken_branch_cost;
34568 case cond_branch_not_taken:
34569 return ix86_cost->cond_not_taken_branch_cost;
34575 gcc_unreachable ();
34580 /* Return a vector mode with twice as many elements as VMODE. */
34581 /* ??? Consider moving this to a table generated by genmodes.c. */
34583 static enum machine_mode
34584 doublesize_vector_mode (enum machine_mode vmode)
34588 case V2SFmode: return V4SFmode;
34589 case V1DImode: return V2DImode;
34590 case V2SImode: return V4SImode;
34591 case V4HImode: return V8HImode;
34592 case V8QImode: return V16QImode;
34594 case V2DFmode: return V4DFmode;
34595 case V4SFmode: return V8SFmode;
34596 case V2DImode: return V4DImode;
34597 case V4SImode: return V8SImode;
34598 case V8HImode: return V16HImode;
34599 case V16QImode: return V32QImode;
34601 case V4DFmode: return V8DFmode;
34602 case V8SFmode: return V16SFmode;
34603 case V4DImode: return V8DImode;
34604 case V8SImode: return V16SImode;
34605 case V16HImode: return V32HImode;
34606 case V32QImode: return V64QImode;
34609 gcc_unreachable ();
34613 /* Construct (set target (vec_select op0 (parallel perm))) and
34614 return true if that's a valid instruction in the active ISA. */
34617 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
34619 rtx rperm[MAX_VECT_LEN], x;
34622 for (i = 0; i < nelt; ++i)
34623 rperm[i] = GEN_INT (perm[i]);
34625 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
34626 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
34627 x = gen_rtx_SET (VOIDmode, target, x);
34630 if (recog_memoized (x) < 0)
34638 /* Similar, but generate a vec_concat from op0 and op1 as well. */
34641 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
34642 const unsigned char *perm, unsigned nelt)
34644 enum machine_mode v2mode;
34647 v2mode = doublesize_vector_mode (GET_MODE (op0));
34648 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
34649 return expand_vselect (target, x, perm, nelt);
34652 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34653 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
34656 expand_vec_perm_blend (struct expand_vec_perm_d *d)
34658 enum machine_mode vmode = d->vmode;
34659 unsigned i, mask, nelt = d->nelt;
34660 rtx target, op0, op1, x;
34661 rtx rperm[32], vperm;
34663 if (d->op0 == d->op1)
34665 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
34667 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
34669 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
34674 /* This is a blend, not a permute. Elements must stay in their
34675 respective lanes. */
34676 for (i = 0; i < nelt; ++i)
34678 unsigned e = d->perm[i];
34679 if (!(e == i || e == i + nelt))
34686 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
34687 decision should be extracted elsewhere, so that we only try that
34688 sequence once all budget==3 options have been tried. */
34689 target = d->target;
34702 for (i = 0; i < nelt; ++i)
34703 mask |= (d->perm[i] >= nelt) << i;
34707 for (i = 0; i < 2; ++i)
34708 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
34713 for (i = 0; i < 4; ++i)
34714 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
34719 /* See if bytes move in pairs so we can use pblendw with
34720 an immediate argument, rather than pblendvb with a vector
34722 for (i = 0; i < 16; i += 2)
34723 if (d->perm[i] + 1 != d->perm[i + 1])
34726 for (i = 0; i < nelt; ++i)
34727 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
34730 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
34731 vperm = force_reg (vmode, vperm);
34733 if (GET_MODE_SIZE (vmode) == 16)
34734 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
34736 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
34740 for (i = 0; i < 8; ++i)
34741 mask |= (d->perm[i * 2] >= 16) << i;
34746 target = gen_lowpart (vmode, target);
34747 op0 = gen_lowpart (vmode, op0);
34748 op1 = gen_lowpart (vmode, op1);
34752 /* See if bytes move in pairs. If not, vpblendvb must be used. */
34753 for (i = 0; i < 32; i += 2)
34754 if (d->perm[i] + 1 != d->perm[i + 1])
34756 /* See if bytes move in quadruplets. If yes, vpblendd
34757 with immediate can be used. */
34758 for (i = 0; i < 32; i += 4)
34759 if (d->perm[i] + 2 != d->perm[i + 2])
34763 /* See if bytes move the same in both lanes. If yes,
34764 vpblendw with immediate can be used. */
34765 for (i = 0; i < 16; i += 2)
34766 if (d->perm[i] + 16 != d->perm[i + 16])
34769 /* Use vpblendw. */
34770 for (i = 0; i < 16; ++i)
34771 mask |= (d->perm[i * 2] >= 32) << i;
34776 /* Use vpblendd. */
34777 for (i = 0; i < 8; ++i)
34778 mask |= (d->perm[i * 4] >= 32) << i;
34783 /* See if words move in pairs. If yes, vpblendd can be used. */
34784 for (i = 0; i < 16; i += 2)
34785 if (d->perm[i] + 1 != d->perm[i + 1])
34789 /* See if words move the same in both lanes. If not,
34790 vpblendvb must be used. */
34791 for (i = 0; i < 8; i++)
34792 if (d->perm[i] + 8 != d->perm[i + 8])
34794 /* Use vpblendvb. */
34795 for (i = 0; i < 32; ++i)
34796 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
34800 target = gen_lowpart (vmode, target);
34801 op0 = gen_lowpart (vmode, op0);
34802 op1 = gen_lowpart (vmode, op1);
34803 goto finish_pblendvb;
34806 /* Use vpblendw. */
34807 for (i = 0; i < 16; ++i)
34808 mask |= (d->perm[i] >= 16) << i;
34812 /* Use vpblendd. */
34813 for (i = 0; i < 8; ++i)
34814 mask |= (d->perm[i * 2] >= 16) << i;
34819 /* Use vpblendd. */
34820 for (i = 0; i < 4; ++i)
34821 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
34826 gcc_unreachable ();
34829 /* This matches five different patterns with the different modes. */
34830 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
34831 x = gen_rtx_SET (VOIDmode, target, x);
34837 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34838 in terms of the variable form of vpermilps.
34840 Note that we will have already failed the immediate input vpermilps,
34841 which requires that the high and low part shuffle be identical; the
34842 variable form doesn't require that. */
34845 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
34847 rtx rperm[8], vperm;
34850 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
34853 /* We can only permute within the 128-bit lane. */
34854 for (i = 0; i < 8; ++i)
34856 unsigned e = d->perm[i];
34857 if (i < 4 ? e >= 4 : e < 4)
34864 for (i = 0; i < 8; ++i)
34866 unsigned e = d->perm[i];
34868 /* Within each 128-bit lane, the elements of op0 are numbered
34869 from 0 and the elements of op1 are numbered from 4. */
34875 rperm[i] = GEN_INT (e);
34878 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
34879 vperm = force_reg (V8SImode, vperm);
34880 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
34885 /* Return true if permutation D can be performed as VMODE permutation
34889 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
34891 unsigned int i, j, chunk;
34893 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
34894 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
34895 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
34898 if (GET_MODE_NUNITS (vmode) >= d->nelt)
34901 chunk = d->nelt / GET_MODE_NUNITS (vmode);
34902 for (i = 0; i < d->nelt; i += chunk)
34903 if (d->perm[i] & (chunk - 1))
34906 for (j = 1; j < chunk; ++j)
34907 if (d->perm[i] + j != d->perm[i + j])
34913 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34914 in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */
34917 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
34919 unsigned i, nelt, eltsz, mask;
34920 unsigned char perm[32];
34921 enum machine_mode vmode = V16QImode;
34922 rtx rperm[32], vperm, target, op0, op1;
34926 if (d->op0 != d->op1)
34928 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
34931 && valid_perm_using_mode_p (V2TImode, d))
34936 /* Use vperm2i128 insn. The pattern uses
34937 V4DImode instead of V2TImode. */
34938 target = gen_lowpart (V4DImode, d->target);
34939 op0 = gen_lowpart (V4DImode, d->op0);
34940 op1 = gen_lowpart (V4DImode, d->op1);
34942 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
34943 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
34944 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
34952 if (GET_MODE_SIZE (d->vmode) == 16)
34957 else if (GET_MODE_SIZE (d->vmode) == 32)
34962 /* V4DImode should be already handled through
34963 expand_vselect by vpermq instruction. */
34964 gcc_assert (d->vmode != V4DImode);
34967 if (d->vmode == V8SImode
34968 || d->vmode == V16HImode
34969 || d->vmode == V32QImode)
34971 /* First see if vpermq can be used for
34972 V8SImode/V16HImode/V32QImode. */
34973 if (valid_perm_using_mode_p (V4DImode, d))
34975 for (i = 0; i < 4; i++)
34976 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
34979 return expand_vselect (gen_lowpart (V4DImode, d->target),
34980 gen_lowpart (V4DImode, d->op0),
34984 /* Next see if vpermd can be used. */
34985 if (valid_perm_using_mode_p (V8SImode, d))
34989 if (vmode == V32QImode)
34991 /* vpshufb only works intra lanes, it is not
34992 possible to shuffle bytes in between the lanes. */
34993 for (i = 0; i < nelt; ++i)
34994 if ((d->perm[i] ^ i) & (nelt / 2))
35005 if (vmode == V8SImode)
35006 for (i = 0; i < 8; ++i)
35007 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
35010 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
35011 if (d->op0 != d->op1)
35012 mask = 2 * nelt - 1;
35013 else if (vmode == V16QImode)
35016 mask = nelt / 2 - 1;
35018 for (i = 0; i < nelt; ++i)
35020 unsigned j, e = d->perm[i] & mask;
35021 for (j = 0; j < eltsz; ++j)
35022 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
35026 vperm = gen_rtx_CONST_VECTOR (vmode,
35027 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
35028 vperm = force_reg (vmode, vperm);
35030 target = gen_lowpart (vmode, d->target);
35031 op0 = gen_lowpart (vmode, d->op0);
35032 if (d->op0 == d->op1)
35034 if (vmode == V16QImode)
35035 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
35036 else if (vmode == V32QImode)
35037 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
35039 emit_insn (gen_avx2_permvarv8si (target, vperm, op0));
35043 op1 = gen_lowpart (vmode, d->op1);
35044 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
35050 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
35051 in a single instruction. */
35054 expand_vec_perm_1 (struct expand_vec_perm_d *d)
35056 unsigned i, nelt = d->nelt;
35057 unsigned char perm2[MAX_VECT_LEN];
35059 /* Check plain VEC_SELECT first, because AVX has instructions that could
35060 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
35061 input where SEL+CONCAT may not. */
35062 if (d->op0 == d->op1)
35064 int mask = nelt - 1;
35065 bool identity_perm = true;
35066 bool broadcast_perm = true;
35068 for (i = 0; i < nelt; i++)
35070 perm2[i] = d->perm[i] & mask;
35072 identity_perm = false;
35074 broadcast_perm = false;
35080 emit_move_insn (d->target, d->op0);
35083 else if (broadcast_perm && TARGET_AVX2)
35085 /* Use vpbroadcast{b,w,d}. */
35086 rtx op = d->op0, (*gen) (rtx, rtx) = NULL;
35090 op = gen_lowpart (V16QImode, op);
35091 gen = gen_avx2_pbroadcastv32qi;
35094 op = gen_lowpart (V8HImode, op);
35095 gen = gen_avx2_pbroadcastv16hi;
35098 op = gen_lowpart (V4SImode, op);
35099 gen = gen_avx2_pbroadcastv8si;
35102 gen = gen_avx2_pbroadcastv16qi;
35105 gen = gen_avx2_pbroadcastv8hi;
35107 /* For other modes prefer other shuffles this function creates. */
35113 emit_insn (gen (d->target, op));
35118 if (expand_vselect (d->target, d->op0, perm2, nelt))
35121 /* There are plenty of patterns in sse.md that are written for
35122 SEL+CONCAT and are not replicated for a single op. Perhaps
35123 that should be changed, to avoid the nastiness here. */
35125 /* Recognize interleave style patterns, which means incrementing
35126 every other permutation operand. */
35127 for (i = 0; i < nelt; i += 2)
35129 perm2[i] = d->perm[i] & mask;
35130 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
35132 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
35135 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
35138 for (i = 0; i < nelt; i += 4)
35140 perm2[i + 0] = d->perm[i + 0] & mask;
35141 perm2[i + 1] = d->perm[i + 1] & mask;
35142 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
35143 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
35146 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
35151 /* Finally, try the fully general two operand permute. */
35152 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
35155 /* Recognize interleave style patterns with reversed operands. */
35156 if (d->op0 != d->op1)
35158 for (i = 0; i < nelt; ++i)
35160 unsigned e = d->perm[i];
35168 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
35172 /* Try the SSE4.1 blend variable merge instructions. */
35173 if (expand_vec_perm_blend (d))
35176 /* Try one of the AVX vpermil variable permutations. */
35177 if (expand_vec_perm_vpermil (d))
35180 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
35181 vpshufb, vpermd or vpermq variable permutation. */
35182 if (expand_vec_perm_pshufb (d))
35188 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35189 in terms of a pair of pshuflw + pshufhw instructions. */
35192 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
35194 unsigned char perm2[MAX_VECT_LEN];
35198 if (d->vmode != V8HImode || d->op0 != d->op1)
35201 /* The two permutations only operate in 64-bit lanes. */
35202 for (i = 0; i < 4; ++i)
35203 if (d->perm[i] >= 4)
35205 for (i = 4; i < 8; ++i)
35206 if (d->perm[i] < 4)
35212 /* Emit the pshuflw. */
35213 memcpy (perm2, d->perm, 4);
35214 for (i = 4; i < 8; ++i)
35216 ok = expand_vselect (d->target, d->op0, perm2, 8);
35219 /* Emit the pshufhw. */
35220 memcpy (perm2 + 4, d->perm + 4, 4);
35221 for (i = 0; i < 4; ++i)
35223 ok = expand_vselect (d->target, d->target, perm2, 8);
35229 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
35230 the permutation using the SSSE3 palignr instruction. This succeeds
35231 when all of the elements in PERM fit within one vector and we merely
35232 need to shift them down so that a single vector permutation has a
35233 chance to succeed. */
35236 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
35238 unsigned i, nelt = d->nelt;
35243 /* Even with AVX, palignr only operates on 128-bit vectors. */
35244 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
35247 min = nelt, max = 0;
35248 for (i = 0; i < nelt; ++i)
35250 unsigned e = d->perm[i];
35256 if (min == 0 || max - min >= nelt)
35259 /* Given that we have SSSE3, we know we'll be able to implement the
35260 single operand permutation after the palignr with pshufb. */
35264 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
35265 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
35266 gen_lowpart (TImode, d->op1),
35267 gen_lowpart (TImode, d->op0), shift));
35269 d->op0 = d->op1 = d->target;
35272 for (i = 0; i < nelt; ++i)
35274 unsigned e = d->perm[i] - min;
35280 /* Test for the degenerate case where the alignment by itself
35281 produces the desired permutation. */
35285 ok = expand_vec_perm_1 (d);
35291 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
35292 a two vector permutation into a single vector permutation by using
35293 an interleave operation to merge the vectors. */
35296 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
35298 struct expand_vec_perm_d dremap, dfinal;
35299 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
35300 unsigned HOST_WIDE_INT contents;
35301 unsigned char remap[2 * MAX_VECT_LEN];
35303 bool ok, same_halves = false;
35305 if (GET_MODE_SIZE (d->vmode) == 16)
35307 if (d->op0 == d->op1)
35310 else if (GET_MODE_SIZE (d->vmode) == 32)
35314 /* For 32-byte modes allow even d->op0 == d->op1.
35315 The lack of cross-lane shuffling in some instructions
35316 might prevent a single insn shuffle. */
35321 /* Examine from whence the elements come. */
35323 for (i = 0; i < nelt; ++i)
35324 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
35326 memset (remap, 0xff, sizeof (remap));
35329 if (GET_MODE_SIZE (d->vmode) == 16)
35331 unsigned HOST_WIDE_INT h1, h2, h3, h4;
35333 /* Split the two input vectors into 4 halves. */
35334 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
35339 /* If the elements from the low halves use interleave low, and similarly
35340 for interleave high. If the elements are from mis-matched halves, we
35341 can use shufps for V4SF/V4SI or do a DImode shuffle. */
35342 if ((contents & (h1 | h3)) == contents)
35345 for (i = 0; i < nelt2; ++i)
35348 remap[i + nelt] = i * 2 + 1;
35349 dremap.perm[i * 2] = i;
35350 dremap.perm[i * 2 + 1] = i + nelt;
35353 else if ((contents & (h2 | h4)) == contents)
35356 for (i = 0; i < nelt2; ++i)
35358 remap[i + nelt2] = i * 2;
35359 remap[i + nelt + nelt2] = i * 2 + 1;
35360 dremap.perm[i * 2] = i + nelt2;
35361 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
35364 else if ((contents & (h1 | h4)) == contents)
35367 for (i = 0; i < nelt2; ++i)
35370 remap[i + nelt + nelt2] = i + nelt2;
35371 dremap.perm[i] = i;
35372 dremap.perm[i + nelt2] = i + nelt + nelt2;
35377 dremap.vmode = V2DImode;
35379 dremap.perm[0] = 0;
35380 dremap.perm[1] = 3;
35383 else if ((contents & (h2 | h3)) == contents)
35386 for (i = 0; i < nelt2; ++i)
35388 remap[i + nelt2] = i;
35389 remap[i + nelt] = i + nelt2;
35390 dremap.perm[i] = i + nelt2;
35391 dremap.perm[i + nelt2] = i + nelt;
35396 dremap.vmode = V2DImode;
35398 dremap.perm[0] = 1;
35399 dremap.perm[1] = 2;
35407 unsigned int nelt4 = nelt / 4, nzcnt = 0;
35408 unsigned HOST_WIDE_INT q[8];
35409 unsigned int nonzero_halves[4];
35411 /* Split the two input vectors into 8 quarters. */
35412 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
35413 for (i = 1; i < 8; ++i)
35414 q[i] = q[0] << (nelt4 * i);
35415 for (i = 0; i < 4; ++i)
35416 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
35418 nonzero_halves[nzcnt] = i;
35424 gcc_assert (d->op0 == d->op1);
35425 nonzero_halves[1] = nonzero_halves[0];
35426 same_halves = true;
35428 else if (d->op0 == d->op1)
35430 gcc_assert (nonzero_halves[0] == 0);
35431 gcc_assert (nonzero_halves[1] == 1);
35436 if (d->perm[0] / nelt2 == nonzero_halves[1])
35438 /* Attempt to increase the likelyhood that dfinal
35439 shuffle will be intra-lane. */
35440 char tmph = nonzero_halves[0];
35441 nonzero_halves[0] = nonzero_halves[1];
35442 nonzero_halves[1] = tmph;
35445 /* vperm2f128 or vperm2i128. */
35446 for (i = 0; i < nelt2; ++i)
35448 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
35449 remap[i + nonzero_halves[0] * nelt2] = i;
35450 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
35451 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
35454 if (d->vmode != V8SFmode
35455 && d->vmode != V4DFmode
35456 && d->vmode != V8SImode)
35458 dremap.vmode = V8SImode;
35460 for (i = 0; i < 4; ++i)
35462 dremap.perm[i] = i + nonzero_halves[0] * 4;
35463 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
35467 else if (d->op0 == d->op1)
35469 else if (TARGET_AVX2
35470 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
35473 for (i = 0; i < nelt4; ++i)
35476 remap[i + nelt] = i * 2 + 1;
35477 remap[i + nelt2] = i * 2 + nelt2;
35478 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
35479 dremap.perm[i * 2] = i;
35480 dremap.perm[i * 2 + 1] = i + nelt;
35481 dremap.perm[i * 2 + nelt2] = i + nelt2;
35482 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
35485 else if (TARGET_AVX2
35486 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
35489 for (i = 0; i < nelt4; ++i)
35491 remap[i + nelt4] = i * 2;
35492 remap[i + nelt + nelt4] = i * 2 + 1;
35493 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
35494 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
35495 dremap.perm[i * 2] = i + nelt4;
35496 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
35497 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
35498 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
35505 /* Use the remapping array set up above to move the elements from their
35506 swizzled locations into their final destinations. */
35508 for (i = 0; i < nelt; ++i)
35510 unsigned e = remap[d->perm[i]];
35511 gcc_assert (e < nelt);
35512 /* If same_halves is true, both halves of the remapped vector are the
35513 same. Avoid cross-lane accesses if possible. */
35514 if (same_halves && i >= nelt2)
35516 gcc_assert (e < nelt2);
35517 dfinal.perm[i] = e + nelt2;
35520 dfinal.perm[i] = e;
35522 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
35523 dfinal.op1 = dfinal.op0;
35524 dremap.target = dfinal.op0;
35526 /* Test if the final remap can be done with a single insn. For V4SFmode or
35527 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
35529 ok = expand_vec_perm_1 (&dfinal);
35530 seq = get_insns ();
35539 if (dremap.vmode != dfinal.vmode)
35541 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
35542 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
35543 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
35546 ok = expand_vec_perm_1 (&dremap);
35553 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
35554 a single vector cross-lane permutation into vpermq followed
35555 by any of the single insn permutations. */
35558 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
35560 struct expand_vec_perm_d dremap, dfinal;
35561 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
35562 unsigned contents[2];
35566 && (d->vmode == V32QImode || d->vmode == V16HImode)
35567 && d->op0 == d->op1))
35572 for (i = 0; i < nelt2; ++i)
35574 contents[0] |= 1u << (d->perm[i] / nelt4);
35575 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
35578 for (i = 0; i < 2; ++i)
35580 unsigned int cnt = 0;
35581 for (j = 0; j < 4; ++j)
35582 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
35590 dremap.vmode = V4DImode;
35592 dremap.target = gen_reg_rtx (V4DImode);
35593 dremap.op0 = gen_lowpart (V4DImode, d->op0);
35594 dremap.op1 = dremap.op0;
35595 for (i = 0; i < 2; ++i)
35597 unsigned int cnt = 0;
35598 for (j = 0; j < 4; ++j)
35599 if ((contents[i] & (1u << j)) != 0)
35600 dremap.perm[2 * i + cnt++] = j;
35601 for (; cnt < 2; ++cnt)
35602 dremap.perm[2 * i + cnt] = 0;
35606 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
35607 dfinal.op1 = dfinal.op0;
35608 for (i = 0, j = 0; i < nelt; ++i)
35612 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
35613 if ((d->perm[i] / nelt4) == dremap.perm[j])
35615 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
35616 dfinal.perm[i] |= nelt4;
35618 gcc_unreachable ();
35621 ok = expand_vec_perm_1 (&dremap);
35624 ok = expand_vec_perm_1 (&dfinal);
35630 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
35631 a two vector permutation using 2 intra-lane interleave insns
35632 and cross-lane shuffle for 32-byte vectors. */
35635 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
35638 rtx (*gen) (rtx, rtx, rtx);
35640 if (d->op0 == d->op1)
35642 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
35644 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
35650 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
35652 for (i = 0; i < nelt; i += 2)
35653 if (d->perm[i] != d->perm[0] + i / 2
35654 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
35664 gen = gen_vec_interleave_highv32qi;
35666 gen = gen_vec_interleave_lowv32qi;
35670 gen = gen_vec_interleave_highv16hi;
35672 gen = gen_vec_interleave_lowv16hi;
35676 gen = gen_vec_interleave_highv8si;
35678 gen = gen_vec_interleave_lowv8si;
35682 gen = gen_vec_interleave_highv4di;
35684 gen = gen_vec_interleave_lowv4di;
35688 gen = gen_vec_interleave_highv8sf;
35690 gen = gen_vec_interleave_lowv8sf;
35694 gen = gen_vec_interleave_highv4df;
35696 gen = gen_vec_interleave_lowv4df;
35699 gcc_unreachable ();
35702 emit_insn (gen (d->target, d->op0, d->op1));
35706 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
35707 permutation with two pshufb insns and an ior. We should have already
35708 failed all two instruction sequences. */
35711 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
35713 rtx rperm[2][16], vperm, l, h, op, m128;
35714 unsigned int i, nelt, eltsz;
35716 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
35718 gcc_assert (d->op0 != d->op1);
35721 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
35723 /* Generate two permutation masks. If the required element is within
35724 the given vector it is shuffled into the proper lane. If the required
35725 element is in the other vector, force a zero into the lane by setting
35726 bit 7 in the permutation mask. */
35727 m128 = GEN_INT (-128);
35728 for (i = 0; i < nelt; ++i)
35730 unsigned j, e = d->perm[i];
35731 unsigned which = (e >= nelt);
35735 for (j = 0; j < eltsz; ++j)
35737 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
35738 rperm[1-which][i*eltsz + j] = m128;
35742 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
35743 vperm = force_reg (V16QImode, vperm);
35745 l = gen_reg_rtx (V16QImode);
35746 op = gen_lowpart (V16QImode, d->op0);
35747 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
35749 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
35750 vperm = force_reg (V16QImode, vperm);
35752 h = gen_reg_rtx (V16QImode);
35753 op = gen_lowpart (V16QImode, d->op1);
35754 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
35756 op = gen_lowpart (V16QImode, d->target);
35757 emit_insn (gen_iorv16qi3 (op, l, h));
35762 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
35763 with two vpshufb insns, vpermq and vpor. We should have already failed
35764 all two or three instruction sequences. */
35767 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
35769 rtx rperm[2][32], vperm, l, h, hp, op, m128;
35770 unsigned int i, nelt, eltsz;
35773 || d->op0 != d->op1
35774 || (d->vmode != V32QImode && d->vmode != V16HImode))
35781 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
35783 /* Generate two permutation masks. If the required element is within
35784 the same lane, it is shuffled in. If the required element from the
35785 other lane, force a zero by setting bit 7 in the permutation mask.
35786 In the other mask the mask has non-negative elements if element
35787 is requested from the other lane, but also moved to the other lane,
35788 so that the result of vpshufb can have the two V2TImode halves
35790 m128 = GEN_INT (-128);
35791 for (i = 0; i < nelt; ++i)
35793 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
35794 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
35796 for (j = 0; j < eltsz; ++j)
35798 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
35799 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
35803 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
35804 vperm = force_reg (V32QImode, vperm);
35806 h = gen_reg_rtx (V32QImode);
35807 op = gen_lowpart (V32QImode, d->op0);
35808 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
35810 /* Swap the 128-byte lanes of h into hp. */
35811 hp = gen_reg_rtx (V4DImode);
35812 op = gen_lowpart (V4DImode, h);
35813 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
35816 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
35817 vperm = force_reg (V32QImode, vperm);
35819 l = gen_reg_rtx (V32QImode);
35820 op = gen_lowpart (V32QImode, d->op0);
35821 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
35823 op = gen_lowpart (V32QImode, d->target);
35824 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
35829 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
35830 and extract-odd permutations of two V32QImode and V16QImode operand
35831 with two vpshufb insns, vpor and vpermq. We should have already
35832 failed all two or three instruction sequences. */
35835 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
35837 rtx rperm[2][32], vperm, l, h, ior, op, m128;
35838 unsigned int i, nelt, eltsz;
35841 || d->op0 == d->op1
35842 || (d->vmode != V32QImode && d->vmode != V16HImode))
35845 for (i = 0; i < d->nelt; ++i)
35846 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
35853 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
35855 /* Generate two permutation masks. In the first permutation mask
35856 the first quarter will contain indexes for the first half
35857 of the op0, the second quarter will contain bit 7 set, third quarter
35858 will contain indexes for the second half of the op0 and the
35859 last quarter bit 7 set. In the second permutation mask
35860 the first quarter will contain bit 7 set, the second quarter
35861 indexes for the first half of the op1, the third quarter bit 7 set
35862 and last quarter indexes for the second half of the op1.
35863 I.e. the first mask e.g. for V32QImode extract even will be:
35864 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
35865 (all values masked with 0xf except for -128) and second mask
35866 for extract even will be
35867 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
35868 m128 = GEN_INT (-128);
35869 for (i = 0; i < nelt; ++i)
35871 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
35872 unsigned which = d->perm[i] >= nelt;
35873 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
35875 for (j = 0; j < eltsz; ++j)
35877 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
35878 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
35882 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
35883 vperm = force_reg (V32QImode, vperm);
35885 l = gen_reg_rtx (V32QImode);
35886 op = gen_lowpart (V32QImode, d->op0);
35887 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
35889 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
35890 vperm = force_reg (V32QImode, vperm);
35892 h = gen_reg_rtx (V32QImode);
35893 op = gen_lowpart (V32QImode, d->op1);
35894 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
35896 ior = gen_reg_rtx (V32QImode);
35897 emit_insn (gen_iorv32qi3 (ior, l, h));
35899 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
35900 op = gen_lowpart (V4DImode, d->target);
35901 ior = gen_lowpart (V4DImode, ior);
35902 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
35903 const1_rtx, GEN_INT (3)));
35908 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
35909 and extract-odd permutations. */
35912 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
35919 t1 = gen_reg_rtx (V4DFmode);
35920 t2 = gen_reg_rtx (V4DFmode);
35922 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
35923 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
35924 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
35926 /* Now an unpck[lh]pd will produce the result required. */
35928 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
35930 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
35936 int mask = odd ? 0xdd : 0x88;
35938 t1 = gen_reg_rtx (V8SFmode);
35939 t2 = gen_reg_rtx (V8SFmode);
35940 t3 = gen_reg_rtx (V8SFmode);
35942 /* Shuffle within the 128-bit lanes to produce:
35943 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
35944 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
35947 /* Shuffle the lanes around to produce:
35948 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
35949 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
35952 /* Shuffle within the 128-bit lanes to produce:
35953 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
35954 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
35956 /* Shuffle within the 128-bit lanes to produce:
35957 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
35958 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
35960 /* Shuffle the lanes around to produce:
35961 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
35962 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
35971 /* These are always directly implementable by expand_vec_perm_1. */
35972 gcc_unreachable ();
35976 return expand_vec_perm_pshufb2 (d);
35979 /* We need 2*log2(N)-1 operations to achieve odd/even
35980 with interleave. */
35981 t1 = gen_reg_rtx (V8HImode);
35982 t2 = gen_reg_rtx (V8HImode);
35983 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
35984 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
35985 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
35986 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
35988 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
35990 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
35997 return expand_vec_perm_pshufb2 (d);
36000 t1 = gen_reg_rtx (V16QImode);
36001 t2 = gen_reg_rtx (V16QImode);
36002 t3 = gen_reg_rtx (V16QImode);
36003 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
36004 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
36005 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
36006 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
36007 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
36008 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
36010 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
36012 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
36019 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
36022 t1 = gen_reg_rtx (V4DImode);
36023 t2 = gen_reg_rtx (V4DImode);
36025 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
36026 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
36027 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
36029 /* Now an vpunpck[lh]qdq will produce the result required. */
36031 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
36033 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
36038 t1 = gen_reg_rtx (V8SImode);
36039 t2 = gen_reg_rtx (V8SImode);
36041 /* Shuffle the lanes around into
36042 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
36043 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t1),
36044 gen_lowpart (V4DImode, d->op0),
36045 gen_lowpart (V4DImode, d->op1),
36047 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t2),
36048 gen_lowpart (V4DImode, d->op0),
36049 gen_lowpart (V4DImode, d->op1),
36052 /* Swap the 2nd and 3rd position in each lane into
36053 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
36054 emit_insn (gen_avx2_pshufdv3 (t1, t1,
36055 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
36056 emit_insn (gen_avx2_pshufdv3 (t2, t2,
36057 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
36059 /* Now an vpunpck[lh]qdq will produce
36060 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
36062 t3 = gen_avx2_interleave_highv4di (gen_lowpart (V4DImode, d->target),
36063 gen_lowpart (V4DImode, t1),
36064 gen_lowpart (V4DImode, t2));
36066 t3 = gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode, d->target),
36067 gen_lowpart (V4DImode, t1),
36068 gen_lowpart (V4DImode, t2));
36073 gcc_unreachable ();
36079 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
36080 extract-even and extract-odd permutations. */
36083 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
36085 unsigned i, odd, nelt = d->nelt;
36088 if (odd != 0 && odd != 1)
36091 for (i = 1; i < nelt; ++i)
36092 if (d->perm[i] != 2 * i + odd)
36095 return expand_vec_perm_even_odd_1 (d, odd);
36098 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
36099 permutations. We assume that expand_vec_perm_1 has already failed. */
36102 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
36104 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
36105 enum machine_mode vmode = d->vmode;
36106 unsigned char perm2[4];
36114 /* These are special-cased in sse.md so that we can optionally
36115 use the vbroadcast instruction. They expand to two insns
36116 if the input happens to be in a register. */
36117 gcc_unreachable ();
36123 /* These are always implementable using standard shuffle patterns. */
36124 gcc_unreachable ();
36128 /* These can be implemented via interleave. We save one insn by
36129 stopping once we have promoted to V4SImode and then use pshufd. */
36132 optab otab = vec_interleave_low_optab;
36136 otab = vec_interleave_high_optab;
36141 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
36142 vmode = get_mode_wider_vector (vmode);
36143 op0 = gen_lowpart (vmode, op0);
36145 while (vmode != V4SImode);
36147 memset (perm2, elt, 4);
36148 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
36156 /* For AVX2 broadcasts of the first element vpbroadcast* or
36157 vpermq should be used by expand_vec_perm_1. */
36158 gcc_assert (!TARGET_AVX2 || d->perm[0]);
36162 gcc_unreachable ();
36166 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
36167 broadcast permutations. */
36170 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
36172 unsigned i, elt, nelt = d->nelt;
36174 if (d->op0 != d->op1)
36178 for (i = 1; i < nelt; ++i)
36179 if (d->perm[i] != elt)
36182 return expand_vec_perm_broadcast_1 (d);
36185 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
36186 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
36187 all the shorter instruction sequences. */
36190 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
36192 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
36193 unsigned int i, nelt, eltsz;
36197 || d->op0 == d->op1
36198 || (d->vmode != V32QImode && d->vmode != V16HImode))
36205 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36207 /* Generate 4 permutation masks. If the required element is within
36208 the same lane, it is shuffled in. If the required element from the
36209 other lane, force a zero by setting bit 7 in the permutation mask.
36210 In the other mask the mask has non-negative elements if element
36211 is requested from the other lane, but also moved to the other lane,
36212 so that the result of vpshufb can have the two V2TImode halves
36214 m128 = GEN_INT (-128);
36215 for (i = 0; i < 32; ++i)
36217 rperm[0][i] = m128;
36218 rperm[1][i] = m128;
36219 rperm[2][i] = m128;
36220 rperm[3][i] = m128;
36226 for (i = 0; i < nelt; ++i)
36228 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
36229 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
36230 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
36232 for (j = 0; j < eltsz; ++j)
36233 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
36234 used[which] = true;
36237 for (i = 0; i < 2; ++i)
36239 if (!used[2 * i + 1])
36244 vperm = gen_rtx_CONST_VECTOR (V32QImode,
36245 gen_rtvec_v (32, rperm[2 * i + 1]));
36246 vperm = force_reg (V32QImode, vperm);
36247 h[i] = gen_reg_rtx (V32QImode);
36248 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
36249 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
36252 /* Swap the 128-byte lanes of h[X]. */
36253 for (i = 0; i < 2; ++i)
36255 if (h[i] == NULL_RTX)
36257 op = gen_reg_rtx (V4DImode);
36258 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
36259 const2_rtx, GEN_INT (3), const0_rtx,
36261 h[i] = gen_lowpart (V32QImode, op);
36264 for (i = 0; i < 2; ++i)
36271 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
36272 vperm = force_reg (V32QImode, vperm);
36273 l[i] = gen_reg_rtx (V32QImode);
36274 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
36275 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
36278 for (i = 0; i < 2; ++i)
36282 op = gen_reg_rtx (V32QImode);
36283 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
36290 gcc_assert (l[0] && l[1]);
36291 op = gen_lowpart (V32QImode, d->target);
36292 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
36296 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
36297 With all of the interface bits taken care of, perform the expansion
36298 in D and return true on success. */
36301 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
36303 /* Try a single instruction expansion. */
36304 if (expand_vec_perm_1 (d))
36307 /* Try sequences of two instructions. */
36309 if (expand_vec_perm_pshuflw_pshufhw (d))
36312 if (expand_vec_perm_palignr (d))
36315 if (expand_vec_perm_interleave2 (d))
36318 if (expand_vec_perm_broadcast (d))
36321 if (expand_vec_perm_vpermq_perm_1 (d))
36324 /* Try sequences of three instructions. */
36326 if (expand_vec_perm_pshufb2 (d))
36329 if (expand_vec_perm_interleave3 (d))
36332 /* Try sequences of four instructions. */
36334 if (expand_vec_perm_vpshufb2_vpermq (d))
36337 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
36340 /* ??? Look for narrow permutations whose element orderings would
36341 allow the promotion to a wider mode. */
36343 /* ??? Look for sequences of interleave or a wider permute that place
36344 the data into the correct lanes for a half-vector shuffle like
36345 pshuf[lh]w or vpermilps. */
36347 /* ??? Look for sequences of interleave that produce the desired results.
36348 The combinatorics of punpck[lh] get pretty ugly... */
36350 if (expand_vec_perm_even_odd (d))
36353 /* Even longer sequences. */
36354 if (expand_vec_perm_vpshufb4_vpermq2 (d))
36361 ix86_expand_vec_perm_const (rtx operands[4])
36363 struct expand_vec_perm_d d;
36364 unsigned char perm[MAX_VECT_LEN];
36365 int i, nelt, which;
36368 d.target = operands[0];
36369 d.op0 = operands[1];
36370 d.op1 = operands[2];
36373 d.vmode = GET_MODE (d.target);
36374 gcc_assert (VECTOR_MODE_P (d.vmode));
36375 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
36376 d.testing_p = false;
36378 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
36379 gcc_assert (XVECLEN (sel, 0) == nelt);
36380 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
36382 for (i = which = 0; i < nelt; ++i)
36384 rtx e = XVECEXP (sel, 0, i);
36385 int ei = INTVAL (e) & (2 * nelt - 1);
36387 which |= (ei < nelt ? 1 : 2);
36398 if (!rtx_equal_p (d.op0, d.op1))
36401 /* The elements of PERM do not suggest that only the first operand
36402 is used, but both operands are identical. Allow easier matching
36403 of the permutation by folding the permutation into the single
36405 for (i = 0; i < nelt; ++i)
36406 if (d.perm[i] >= nelt)
36415 for (i = 0; i < nelt; ++i)
36421 if (ix86_expand_vec_perm_const_1 (&d))
36424 /* If the mask says both arguments are needed, but they are the same,
36425 the above tried to expand with d.op0 == d.op1. If that didn't work,
36426 retry with d.op0 != d.op1 as that is what testing has been done with. */
36427 if (which == 3 && d.op0 == d.op1)
36432 memcpy (d.perm, perm, sizeof (perm));
36433 d.op1 = gen_reg_rtx (d.vmode);
36435 ok = ix86_expand_vec_perm_const_1 (&d);
36436 seq = get_insns ();
36440 emit_move_insn (d.op1, d.op0);
36449 /* Implement targetm.vectorize.vec_perm_const_ok. */
36452 ix86_vectorize_vec_perm_const_ok (tree vec_type, tree mask)
36454 struct expand_vec_perm_d d;
36455 unsigned int i, nelt, which;
36459 d.vmode = TYPE_MODE (vec_type);
36460 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
36461 d.testing_p = true;
36463 /* Given sufficient ISA support we can just return true here
36464 for selected vector modes. */
36465 if (GET_MODE_SIZE (d.vmode) == 16)
36467 /* All implementable with a single vpperm insn. */
36470 /* All implementable with 2 pshufb + 1 ior. */
36473 /* All implementable with shufpd or unpck[lh]pd. */
36478 /* Extract the values from the vector CST into the permutation
36480 list = TREE_VECTOR_CST_ELTS (mask);
36481 for (i = which = 0; i < nelt; ++i, list = TREE_CHAIN (list))
36483 unsigned HOST_WIDE_INT e;
36485 gcc_checking_assert (host_integerp (TREE_VALUE (list), 1));
36486 e = tree_low_cst (TREE_VALUE (list), 1);
36487 gcc_assert (e < 2 * nelt);
36489 which |= (e < nelt ? 1 : 2);
36492 gcc_assert (list == NULL);
36494 /* For all elements from second vector, fold the elements to first. */
36496 for (i = 0; i < nelt; ++i)
36499 /* Check whether the mask can be applied to the vector type. */
36500 one_vec = (which != 3);
36502 /* Implementable with shufps or pshufd. */
36503 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
36506 /* Otherwise we have to go through the motions and see if we can
36507 figure out how to generate the requested permutation. */
36508 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
36509 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
36511 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
36514 ret = ix86_expand_vec_perm_const_1 (&d);
36521 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
36523 struct expand_vec_perm_d d;
36529 d.vmode = GET_MODE (targ);
36530 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
36531 d.testing_p = false;
36533 for (i = 0; i < nelt; ++i)
36534 d.perm[i] = i * 2 + odd;
36536 /* We'll either be able to implement the permutation directly... */
36537 if (expand_vec_perm_1 (&d))
36540 /* ... or we use the special-case patterns. */
36541 expand_vec_perm_even_odd_1 (&d, odd);
36544 /* Expand an insert into a vector register through pinsr insn.
36545 Return true if successful. */
36548 ix86_expand_pinsr (rtx *operands)
36550 rtx dst = operands[0];
36551 rtx src = operands[3];
36553 unsigned int size = INTVAL (operands[1]);
36554 unsigned int pos = INTVAL (operands[2]);
36556 if (GET_CODE (dst) == SUBREG)
36558 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
36559 dst = SUBREG_REG (dst);
36562 if (GET_CODE (src) == SUBREG)
36563 src = SUBREG_REG (src);
36565 switch (GET_MODE (dst))
36572 enum machine_mode srcmode, dstmode;
36573 rtx (*pinsr)(rtx, rtx, rtx, rtx);
36575 srcmode = mode_for_size (size, MODE_INT, 0);
36580 if (!TARGET_SSE4_1)
36582 dstmode = V16QImode;
36583 pinsr = gen_sse4_1_pinsrb;
36589 dstmode = V8HImode;
36590 pinsr = gen_sse2_pinsrw;
36594 if (!TARGET_SSE4_1)
36596 dstmode = V4SImode;
36597 pinsr = gen_sse4_1_pinsrd;
36601 gcc_assert (TARGET_64BIT);
36602 if (!TARGET_SSE4_1)
36604 dstmode = V2DImode;
36605 pinsr = gen_sse4_1_pinsrq;
36612 dst = gen_lowpart (dstmode, dst);
36613 src = gen_lowpart (srcmode, src);
36617 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
36626 /* This function returns the calling abi specific va_list type node.
36627 It returns the FNDECL specific va_list type. */
36630 ix86_fn_abi_va_list (tree fndecl)
36633 return va_list_type_node;
36634 gcc_assert (fndecl != NULL_TREE);
36636 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
36637 return ms_va_list_type_node;
36639 return sysv_va_list_type_node;
36642 /* Returns the canonical va_list type specified by TYPE. If there
36643 is no valid TYPE provided, it return NULL_TREE. */
36646 ix86_canonical_va_list_type (tree type)
36650 /* Resolve references and pointers to va_list type. */
36651 if (TREE_CODE (type) == MEM_REF)
36652 type = TREE_TYPE (type);
36653 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
36654 type = TREE_TYPE (type);
36655 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
36656 type = TREE_TYPE (type);
36658 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
36660 wtype = va_list_type_node;
36661 gcc_assert (wtype != NULL_TREE);
36663 if (TREE_CODE (wtype) == ARRAY_TYPE)
36665 /* If va_list is an array type, the argument may have decayed
36666 to a pointer type, e.g. by being passed to another function.
36667 In that case, unwrap both types so that we can compare the
36668 underlying records. */
36669 if (TREE_CODE (htype) == ARRAY_TYPE
36670 || POINTER_TYPE_P (htype))
36672 wtype = TREE_TYPE (wtype);
36673 htype = TREE_TYPE (htype);
36676 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
36677 return va_list_type_node;
36678 wtype = sysv_va_list_type_node;
36679 gcc_assert (wtype != NULL_TREE);
36681 if (TREE_CODE (wtype) == ARRAY_TYPE)
36683 /* If va_list is an array type, the argument may have decayed
36684 to a pointer type, e.g. by being passed to another function.
36685 In that case, unwrap both types so that we can compare the
36686 underlying records. */
36687 if (TREE_CODE (htype) == ARRAY_TYPE
36688 || POINTER_TYPE_P (htype))
36690 wtype = TREE_TYPE (wtype);
36691 htype = TREE_TYPE (htype);
36694 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
36695 return sysv_va_list_type_node;
36696 wtype = ms_va_list_type_node;
36697 gcc_assert (wtype != NULL_TREE);
36699 if (TREE_CODE (wtype) == ARRAY_TYPE)
36701 /* If va_list is an array type, the argument may have decayed
36702 to a pointer type, e.g. by being passed to another function.
36703 In that case, unwrap both types so that we can compare the
36704 underlying records. */
36705 if (TREE_CODE (htype) == ARRAY_TYPE
36706 || POINTER_TYPE_P (htype))
36708 wtype = TREE_TYPE (wtype);
36709 htype = TREE_TYPE (htype);
36712 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
36713 return ms_va_list_type_node;
36716 return std_canonical_va_list_type (type);
36719 /* Iterate through the target-specific builtin types for va_list.
36720 IDX denotes the iterator, *PTREE is set to the result type of
36721 the va_list builtin, and *PNAME to its internal type.
36722 Returns zero if there is no element for this index, otherwise
36723 IDX should be increased upon the next call.
36724 Note, do not iterate a base builtin's name like __builtin_va_list.
36725 Used from c_common_nodes_and_builtins. */
36728 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
36738 *ptree = ms_va_list_type_node;
36739 *pname = "__builtin_ms_va_list";
36743 *ptree = sysv_va_list_type_node;
36744 *pname = "__builtin_sysv_va_list";
36752 #undef TARGET_SCHED_DISPATCH
36753 #define TARGET_SCHED_DISPATCH has_dispatch
36754 #undef TARGET_SCHED_DISPATCH_DO
36755 #define TARGET_SCHED_DISPATCH_DO do_dispatch
36756 #undef TARGET_SCHED_REASSOCIATION_WIDTH
36757 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
36759 /* The size of the dispatch window is the total number of bytes of
36760 object code allowed in a window. */
36761 #define DISPATCH_WINDOW_SIZE 16
36763 /* Number of dispatch windows considered for scheduling. */
36764 #define MAX_DISPATCH_WINDOWS 3
36766 /* Maximum number of instructions in a window. */
36769 /* Maximum number of immediate operands in a window. */
36772 /* Maximum number of immediate bits allowed in a window. */
36773 #define MAX_IMM_SIZE 128
36775 /* Maximum number of 32 bit immediates allowed in a window. */
36776 #define MAX_IMM_32 4
36778 /* Maximum number of 64 bit immediates allowed in a window. */
36779 #define MAX_IMM_64 2
36781 /* Maximum total of loads or prefetches allowed in a window. */
36784 /* Maximum total of stores allowed in a window. */
36785 #define MAX_STORE 1
36791 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
36792 enum dispatch_group {
36807 /* Number of allowable groups in a dispatch window. It is an array
36808 indexed by dispatch_group enum. 100 is used as a big number,
36809 because the number of these kind of operations does not have any
36810 effect in dispatch window, but we need them for other reasons in
36812 static unsigned int num_allowable_groups[disp_last] = {
36813 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
36816 char group_name[disp_last + 1][16] = {
36817 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
36818 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
36819 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
36822 /* Instruction path. */
36825 path_single, /* Single micro op. */
36826 path_double, /* Double micro op. */
36827 path_multi, /* Instructions with more than 2 micro op.. */
36831 /* sched_insn_info defines a window to the instructions scheduled in
36832 the basic block. It contains a pointer to the insn_info table and
36833 the instruction scheduled.
36835 Windows are allocated for each basic block and are linked
36837 typedef struct sched_insn_info_s {
36839 enum dispatch_group group;
36840 enum insn_path path;
36845 /* Linked list of dispatch windows. This is a two way list of
36846 dispatch windows of a basic block. It contains information about
36847 the number of uops in the window and the total number of
36848 instructions and of bytes in the object code for this dispatch
36850 typedef struct dispatch_windows_s {
36851 int num_insn; /* Number of insn in the window. */
36852 int num_uops; /* Number of uops in the window. */
36853 int window_size; /* Number of bytes in the window. */
36854 int window_num; /* Window number between 0 or 1. */
36855 int num_imm; /* Number of immediates in an insn. */
36856 int num_imm_32; /* Number of 32 bit immediates in an insn. */
36857 int num_imm_64; /* Number of 64 bit immediates in an insn. */
36858 int imm_size; /* Total immediates in the window. */
36859 int num_loads; /* Total memory loads in the window. */
36860 int num_stores; /* Total memory stores in the window. */
36861 int violation; /* Violation exists in window. */
36862 sched_insn_info *window; /* Pointer to the window. */
36863 struct dispatch_windows_s *next;
36864 struct dispatch_windows_s *prev;
36865 } dispatch_windows;
36867 /* Immediate valuse used in an insn. */
36868 typedef struct imm_info_s
36875 static dispatch_windows *dispatch_window_list;
36876 static dispatch_windows *dispatch_window_list1;
36878 /* Get dispatch group of insn. */
36880 static enum dispatch_group
36881 get_mem_group (rtx insn)
36883 enum attr_memory memory;
36885 if (INSN_CODE (insn) < 0)
36886 return disp_no_group;
36887 memory = get_attr_memory (insn);
36888 if (memory == MEMORY_STORE)
36891 if (memory == MEMORY_LOAD)
36894 if (memory == MEMORY_BOTH)
36895 return disp_load_store;
36897 return disp_no_group;
36900 /* Return true if insn is a compare instruction. */
36905 enum attr_type type;
36907 type = get_attr_type (insn);
36908 return (type == TYPE_TEST
36909 || type == TYPE_ICMP
36910 || type == TYPE_FCMP
36911 || GET_CODE (PATTERN (insn)) == COMPARE);
36914 /* Return true if a dispatch violation encountered. */
36917 dispatch_violation (void)
36919 if (dispatch_window_list->next)
36920 return dispatch_window_list->next->violation;
36921 return dispatch_window_list->violation;
36924 /* Return true if insn is a branch instruction. */
36927 is_branch (rtx insn)
36929 return (CALL_P (insn) || JUMP_P (insn));
36932 /* Return true if insn is a prefetch instruction. */
36935 is_prefetch (rtx insn)
36937 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
36940 /* This function initializes a dispatch window and the list container holding a
36941 pointer to the window. */
36944 init_window (int window_num)
36947 dispatch_windows *new_list;
36949 if (window_num == 0)
36950 new_list = dispatch_window_list;
36952 new_list = dispatch_window_list1;
36954 new_list->num_insn = 0;
36955 new_list->num_uops = 0;
36956 new_list->window_size = 0;
36957 new_list->next = NULL;
36958 new_list->prev = NULL;
36959 new_list->window_num = window_num;
36960 new_list->num_imm = 0;
36961 new_list->num_imm_32 = 0;
36962 new_list->num_imm_64 = 0;
36963 new_list->imm_size = 0;
36964 new_list->num_loads = 0;
36965 new_list->num_stores = 0;
36966 new_list->violation = false;
36968 for (i = 0; i < MAX_INSN; i++)
36970 new_list->window[i].insn = NULL;
36971 new_list->window[i].group = disp_no_group;
36972 new_list->window[i].path = no_path;
36973 new_list->window[i].byte_len = 0;
36974 new_list->window[i].imm_bytes = 0;
36979 /* This function allocates and initializes a dispatch window and the
36980 list container holding a pointer to the window. */
36982 static dispatch_windows *
36983 allocate_window (void)
36985 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
36986 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
36991 /* This routine initializes the dispatch scheduling information. It
36992 initiates building dispatch scheduler tables and constructs the
36993 first dispatch window. */
36996 init_dispatch_sched (void)
36998 /* Allocate a dispatch list and a window. */
36999 dispatch_window_list = allocate_window ();
37000 dispatch_window_list1 = allocate_window ();
37005 /* This function returns true if a branch is detected. End of a basic block
37006 does not have to be a branch, but here we assume only branches end a
37010 is_end_basic_block (enum dispatch_group group)
37012 return group == disp_branch;
37015 /* This function is called when the end of a window processing is reached. */
37018 process_end_window (void)
37020 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
37021 if (dispatch_window_list->next)
37023 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
37024 gcc_assert (dispatch_window_list->window_size
37025 + dispatch_window_list1->window_size <= 48);
37031 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
37032 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
37033 for 48 bytes of instructions. Note that these windows are not dispatch
37034 windows that their sizes are DISPATCH_WINDOW_SIZE. */
37036 static dispatch_windows *
37037 allocate_next_window (int window_num)
37039 if (window_num == 0)
37041 if (dispatch_window_list->next)
37044 return dispatch_window_list;
37047 dispatch_window_list->next = dispatch_window_list1;
37048 dispatch_window_list1->prev = dispatch_window_list;
37050 return dispatch_window_list1;
37053 /* Increment the number of immediate operands of an instruction. */
37056 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
37061 switch ( GET_CODE (*in_rtx))
37066 (imm_values->imm)++;
37067 if (x86_64_immediate_operand (*in_rtx, SImode))
37068 (imm_values->imm32)++;
37070 (imm_values->imm64)++;
37074 (imm_values->imm)++;
37075 (imm_values->imm64)++;
37079 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
37081 (imm_values->imm)++;
37082 (imm_values->imm32)++;
37093 /* Compute number of immediate operands of an instruction. */
37096 find_constant (rtx in_rtx, imm_info *imm_values)
37098 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
37099 (rtx_function) find_constant_1, (void *) imm_values);
37102 /* Return total size of immediate operands of an instruction along with number
37103 of corresponding immediate-operands. It initializes its parameters to zero
37104 befor calling FIND_CONSTANT.
37105 INSN is the input instruction. IMM is the total of immediates.
37106 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
37110 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
37112 imm_info imm_values = {0, 0, 0};
37114 find_constant (insn, &imm_values);
37115 *imm = imm_values.imm;
37116 *imm32 = imm_values.imm32;
37117 *imm64 = imm_values.imm64;
37118 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
37121 /* This function indicates if an operand of an instruction is an
37125 has_immediate (rtx insn)
37127 int num_imm_operand;
37128 int num_imm32_operand;
37129 int num_imm64_operand;
37132 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
37133 &num_imm64_operand);
37137 /* Return single or double path for instructions. */
37139 static enum insn_path
37140 get_insn_path (rtx insn)
37142 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
37144 if ((int)path == 0)
37145 return path_single;
37147 if ((int)path == 1)
37148 return path_double;
37153 /* Return insn dispatch group. */
37155 static enum dispatch_group
37156 get_insn_group (rtx insn)
37158 enum dispatch_group group = get_mem_group (insn);
37162 if (is_branch (insn))
37163 return disp_branch;
37168 if (has_immediate (insn))
37171 if (is_prefetch (insn))
37172 return disp_prefetch;
37174 return disp_no_group;
37177 /* Count number of GROUP restricted instructions in a dispatch
37178 window WINDOW_LIST. */
37181 count_num_restricted (rtx insn, dispatch_windows *window_list)
37183 enum dispatch_group group = get_insn_group (insn);
37185 int num_imm_operand;
37186 int num_imm32_operand;
37187 int num_imm64_operand;
37189 if (group == disp_no_group)
37192 if (group == disp_imm)
37194 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
37195 &num_imm64_operand);
37196 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
37197 || num_imm_operand + window_list->num_imm > MAX_IMM
37198 || (num_imm32_operand > 0
37199 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
37200 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
37201 || (num_imm64_operand > 0
37202 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
37203 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
37204 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
37205 && num_imm64_operand > 0
37206 && ((window_list->num_imm_64 > 0
37207 && window_list->num_insn >= 2)
37208 || window_list->num_insn >= 3)))
37214 if ((group == disp_load_store
37215 && (window_list->num_loads >= MAX_LOAD
37216 || window_list->num_stores >= MAX_STORE))
37217 || ((group == disp_load
37218 || group == disp_prefetch)
37219 && window_list->num_loads >= MAX_LOAD)
37220 || (group == disp_store
37221 && window_list->num_stores >= MAX_STORE))
37227 /* This function returns true if insn satisfies dispatch rules on the
37228 last window scheduled. */
37231 fits_dispatch_window (rtx insn)
37233 dispatch_windows *window_list = dispatch_window_list;
37234 dispatch_windows *window_list_next = dispatch_window_list->next;
37235 unsigned int num_restrict;
37236 enum dispatch_group group = get_insn_group (insn);
37237 enum insn_path path = get_insn_path (insn);
37240 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
37241 instructions should be given the lowest priority in the
37242 scheduling process in Haifa scheduler to make sure they will be
37243 scheduled in the same dispatch window as the refrence to them. */
37244 if (group == disp_jcc || group == disp_cmp)
37247 /* Check nonrestricted. */
37248 if (group == disp_no_group || group == disp_branch)
37251 /* Get last dispatch window. */
37252 if (window_list_next)
37253 window_list = window_list_next;
37255 if (window_list->window_num == 1)
37257 sum = window_list->prev->window_size + window_list->window_size;
37260 || (min_insn_size (insn) + sum) >= 48)
37261 /* Window 1 is full. Go for next window. */
37265 num_restrict = count_num_restricted (insn, window_list);
37267 if (num_restrict > num_allowable_groups[group])
37270 /* See if it fits in the first window. */
37271 if (window_list->window_num == 0)
37273 /* The first widow should have only single and double path
37275 if (path == path_double
37276 && (window_list->num_uops + 2) > MAX_INSN)
37278 else if (path != path_single)
37284 /* Add an instruction INSN with NUM_UOPS micro-operations to the
37285 dispatch window WINDOW_LIST. */
37288 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
37290 int byte_len = min_insn_size (insn);
37291 int num_insn = window_list->num_insn;
37293 sched_insn_info *window = window_list->window;
37294 enum dispatch_group group = get_insn_group (insn);
37295 enum insn_path path = get_insn_path (insn);
37296 int num_imm_operand;
37297 int num_imm32_operand;
37298 int num_imm64_operand;
37300 if (!window_list->violation && group != disp_cmp
37301 && !fits_dispatch_window (insn))
37302 window_list->violation = true;
37304 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
37305 &num_imm64_operand);
37307 /* Initialize window with new instruction. */
37308 window[num_insn].insn = insn;
37309 window[num_insn].byte_len = byte_len;
37310 window[num_insn].group = group;
37311 window[num_insn].path = path;
37312 window[num_insn].imm_bytes = imm_size;
37314 window_list->window_size += byte_len;
37315 window_list->num_insn = num_insn + 1;
37316 window_list->num_uops = window_list->num_uops + num_uops;
37317 window_list->imm_size += imm_size;
37318 window_list->num_imm += num_imm_operand;
37319 window_list->num_imm_32 += num_imm32_operand;
37320 window_list->num_imm_64 += num_imm64_operand;
37322 if (group == disp_store)
37323 window_list->num_stores += 1;
37324 else if (group == disp_load
37325 || group == disp_prefetch)
37326 window_list->num_loads += 1;
37327 else if (group == disp_load_store)
37329 window_list->num_stores += 1;
37330 window_list->num_loads += 1;
37334 /* Adds a scheduled instruction, INSN, to the current dispatch window.
37335 If the total bytes of instructions or the number of instructions in
37336 the window exceed allowable, it allocates a new window. */
37339 add_to_dispatch_window (rtx insn)
37342 dispatch_windows *window_list;
37343 dispatch_windows *next_list;
37344 dispatch_windows *window0_list;
37345 enum insn_path path;
37346 enum dispatch_group insn_group;
37354 if (INSN_CODE (insn) < 0)
37357 byte_len = min_insn_size (insn);
37358 window_list = dispatch_window_list;
37359 next_list = window_list->next;
37360 path = get_insn_path (insn);
37361 insn_group = get_insn_group (insn);
37363 /* Get the last dispatch window. */
37365 window_list = dispatch_window_list->next;
37367 if (path == path_single)
37369 else if (path == path_double)
37372 insn_num_uops = (int) path;
37374 /* If current window is full, get a new window.
37375 Window number zero is full, if MAX_INSN uops are scheduled in it.
37376 Window number one is full, if window zero's bytes plus window
37377 one's bytes is 32, or if the bytes of the new instruction added
37378 to the total makes it greater than 48, or it has already MAX_INSN
37379 instructions in it. */
37380 num_insn = window_list->num_insn;
37381 num_uops = window_list->num_uops;
37382 window_num = window_list->window_num;
37383 insn_fits = fits_dispatch_window (insn);
37385 if (num_insn >= MAX_INSN
37386 || num_uops + insn_num_uops > MAX_INSN
37389 window_num = ~window_num & 1;
37390 window_list = allocate_next_window (window_num);
37393 if (window_num == 0)
37395 add_insn_window (insn, window_list, insn_num_uops);
37396 if (window_list->num_insn >= MAX_INSN
37397 && insn_group == disp_branch)
37399 process_end_window ();
37403 else if (window_num == 1)
37405 window0_list = window_list->prev;
37406 sum = window0_list->window_size + window_list->window_size;
37408 || (byte_len + sum) >= 48)
37410 process_end_window ();
37411 window_list = dispatch_window_list;
37414 add_insn_window (insn, window_list, insn_num_uops);
37417 gcc_unreachable ();
37419 if (is_end_basic_block (insn_group))
37421 /* End of basic block is reached do end-basic-block process. */
37422 process_end_window ();
37427 /* Print the dispatch window, WINDOW_NUM, to FILE. */
37429 DEBUG_FUNCTION static void
37430 debug_dispatch_window_file (FILE *file, int window_num)
37432 dispatch_windows *list;
37435 if (window_num == 0)
37436 list = dispatch_window_list;
37438 list = dispatch_window_list1;
37440 fprintf (file, "Window #%d:\n", list->window_num);
37441 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
37442 list->num_insn, list->num_uops, list->window_size);
37443 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
37444 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
37446 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
37448 fprintf (file, " insn info:\n");
37450 for (i = 0; i < MAX_INSN; i++)
37452 if (!list->window[i].insn)
37454 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
37455 i, group_name[list->window[i].group],
37456 i, (void *)list->window[i].insn,
37457 i, list->window[i].path,
37458 i, list->window[i].byte_len,
37459 i, list->window[i].imm_bytes);
37463 /* Print to stdout a dispatch window. */
37465 DEBUG_FUNCTION void
37466 debug_dispatch_window (int window_num)
37468 debug_dispatch_window_file (stdout, window_num);
37471 /* Print INSN dispatch information to FILE. */
37473 DEBUG_FUNCTION static void
37474 debug_insn_dispatch_info_file (FILE *file, rtx insn)
37477 enum insn_path path;
37478 enum dispatch_group group;
37480 int num_imm_operand;
37481 int num_imm32_operand;
37482 int num_imm64_operand;
37484 if (INSN_CODE (insn) < 0)
37487 byte_len = min_insn_size (insn);
37488 path = get_insn_path (insn);
37489 group = get_insn_group (insn);
37490 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
37491 &num_imm64_operand);
37493 fprintf (file, " insn info:\n");
37494 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
37495 group_name[group], path, byte_len);
37496 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
37497 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
37500 /* Print to STDERR the status of the ready list with respect to
37501 dispatch windows. */
37503 DEBUG_FUNCTION void
37504 debug_ready_dispatch (void)
37507 int no_ready = number_in_ready ();
37509 fprintf (stdout, "Number of ready: %d\n", no_ready);
37511 for (i = 0; i < no_ready; i++)
37512 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
37515 /* This routine is the driver of the dispatch scheduler. */
37518 do_dispatch (rtx insn, int mode)
37520 if (mode == DISPATCH_INIT)
37521 init_dispatch_sched ();
37522 else if (mode == ADD_TO_DISPATCH_WINDOW)
37523 add_to_dispatch_window (insn);
37526 /* Return TRUE if Dispatch Scheduling is supported. */
37529 has_dispatch (rtx insn, int action)
37531 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
37532 && flag_dispatch_scheduler)
37538 case IS_DISPATCH_ON:
37543 return is_cmp (insn);
37545 case DISPATCH_VIOLATION:
37546 return dispatch_violation ();
37548 case FITS_DISPATCH_WINDOW:
37549 return fits_dispatch_window (insn);
37555 /* Implementation of reassociation_width target hook used by
37556 reassoc phase to identify parallelism level in reassociated
37557 tree. Statements tree_code is passed in OPC. Arguments type
37560 Currently parallel reassociation is enabled for Atom
37561 processors only and we set reassociation width to be 2
37562 because Atom may issue up to 2 instructions per cycle.
37564 Return value should be fixed if parallel reassociation is
37565 enabled for other processors. */
37568 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
37569 enum machine_mode mode)
37573 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
37575 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
37581 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
37582 place emms and femms instructions. */
37584 static enum machine_mode
37585 ix86_preferred_simd_mode (enum machine_mode mode)
37593 return TARGET_AVX2 ? V32QImode : V16QImode;
37595 return TARGET_AVX2 ? V16HImode : V8HImode;
37597 return TARGET_AVX2 ? V8SImode : V4SImode;
37599 return TARGET_AVX2 ? V4DImode : V2DImode;
37602 if (TARGET_AVX && !TARGET_PREFER_AVX128)
37608 if (!TARGET_VECTORIZE_DOUBLE)
37610 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
37612 else if (TARGET_SSE2)
37621 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
37624 static unsigned int
37625 ix86_autovectorize_vector_sizes (void)
37627 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
37630 /* Initialize the GCC target structure. */
37631 #undef TARGET_RETURN_IN_MEMORY
37632 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
37634 #undef TARGET_LEGITIMIZE_ADDRESS
37635 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
37637 #undef TARGET_ATTRIBUTE_TABLE
37638 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
37639 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
37640 # undef TARGET_MERGE_DECL_ATTRIBUTES
37641 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
37644 #undef TARGET_COMP_TYPE_ATTRIBUTES
37645 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
37647 #undef TARGET_INIT_BUILTINS
37648 #define TARGET_INIT_BUILTINS ix86_init_builtins
37649 #undef TARGET_BUILTIN_DECL
37650 #define TARGET_BUILTIN_DECL ix86_builtin_decl
37651 #undef TARGET_EXPAND_BUILTIN
37652 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
37654 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
37655 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
37656 ix86_builtin_vectorized_function
37658 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
37659 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
37661 #undef TARGET_BUILTIN_RECIPROCAL
37662 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
37664 #undef TARGET_ASM_FUNCTION_EPILOGUE
37665 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
37667 #undef TARGET_ENCODE_SECTION_INFO
37668 #ifndef SUBTARGET_ENCODE_SECTION_INFO
37669 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
37671 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
37674 #undef TARGET_ASM_OPEN_PAREN
37675 #define TARGET_ASM_OPEN_PAREN ""
37676 #undef TARGET_ASM_CLOSE_PAREN
37677 #define TARGET_ASM_CLOSE_PAREN ""
37679 #undef TARGET_ASM_BYTE_OP
37680 #define TARGET_ASM_BYTE_OP ASM_BYTE
37682 #undef TARGET_ASM_ALIGNED_HI_OP
37683 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
37684 #undef TARGET_ASM_ALIGNED_SI_OP
37685 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
37687 #undef TARGET_ASM_ALIGNED_DI_OP
37688 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
37691 #undef TARGET_PROFILE_BEFORE_PROLOGUE
37692 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
37694 #undef TARGET_ASM_UNALIGNED_HI_OP
37695 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
37696 #undef TARGET_ASM_UNALIGNED_SI_OP
37697 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
37698 #undef TARGET_ASM_UNALIGNED_DI_OP
37699 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
37701 #undef TARGET_PRINT_OPERAND
37702 #define TARGET_PRINT_OPERAND ix86_print_operand
37703 #undef TARGET_PRINT_OPERAND_ADDRESS
37704 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
37705 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
37706 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
37707 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
37708 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
37710 #undef TARGET_SCHED_INIT_GLOBAL
37711 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
37712 #undef TARGET_SCHED_ADJUST_COST
37713 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
37714 #undef TARGET_SCHED_ISSUE_RATE
37715 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
37716 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
37717 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
37718 ia32_multipass_dfa_lookahead
37720 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
37721 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
37724 #undef TARGET_HAVE_TLS
37725 #define TARGET_HAVE_TLS true
37727 #undef TARGET_CANNOT_FORCE_CONST_MEM
37728 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
37729 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
37730 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
37732 #undef TARGET_DELEGITIMIZE_ADDRESS
37733 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
37735 #undef TARGET_MS_BITFIELD_LAYOUT_P
37736 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
37739 #undef TARGET_BINDS_LOCAL_P
37740 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
37742 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
37743 #undef TARGET_BINDS_LOCAL_P
37744 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
37747 #undef TARGET_ASM_OUTPUT_MI_THUNK
37748 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
37749 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
37750 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
37752 #undef TARGET_ASM_FILE_START
37753 #define TARGET_ASM_FILE_START x86_file_start
37755 #undef TARGET_OPTION_OVERRIDE
37756 #define TARGET_OPTION_OVERRIDE ix86_option_override
37758 #undef TARGET_REGISTER_MOVE_COST
37759 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
37760 #undef TARGET_MEMORY_MOVE_COST
37761 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
37762 #undef TARGET_RTX_COSTS
37763 #define TARGET_RTX_COSTS ix86_rtx_costs
37764 #undef TARGET_ADDRESS_COST
37765 #define TARGET_ADDRESS_COST ix86_address_cost
37767 #undef TARGET_FIXED_CONDITION_CODE_REGS
37768 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
37769 #undef TARGET_CC_MODES_COMPATIBLE
37770 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
37772 #undef TARGET_MACHINE_DEPENDENT_REORG
37773 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
37775 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
37776 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
37778 #undef TARGET_BUILD_BUILTIN_VA_LIST
37779 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
37781 #undef TARGET_ENUM_VA_LIST_P
37782 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
37784 #undef TARGET_FN_ABI_VA_LIST
37785 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
37787 #undef TARGET_CANONICAL_VA_LIST_TYPE
37788 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
37790 #undef TARGET_EXPAND_BUILTIN_VA_START
37791 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
37793 #undef TARGET_MD_ASM_CLOBBERS
37794 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
37796 #undef TARGET_PROMOTE_PROTOTYPES
37797 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
37798 #undef TARGET_STRUCT_VALUE_RTX
37799 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
37800 #undef TARGET_SETUP_INCOMING_VARARGS
37801 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
37802 #undef TARGET_MUST_PASS_IN_STACK
37803 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
37804 #undef TARGET_FUNCTION_ARG_ADVANCE
37805 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
37806 #undef TARGET_FUNCTION_ARG
37807 #define TARGET_FUNCTION_ARG ix86_function_arg
37808 #undef TARGET_FUNCTION_ARG_BOUNDARY
37809 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
37810 #undef TARGET_PASS_BY_REFERENCE
37811 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
37812 #undef TARGET_INTERNAL_ARG_POINTER
37813 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
37814 #undef TARGET_UPDATE_STACK_BOUNDARY
37815 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
37816 #undef TARGET_GET_DRAP_RTX
37817 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
37818 #undef TARGET_STRICT_ARGUMENT_NAMING
37819 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
37820 #undef TARGET_STATIC_CHAIN
37821 #define TARGET_STATIC_CHAIN ix86_static_chain
37822 #undef TARGET_TRAMPOLINE_INIT
37823 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
37824 #undef TARGET_RETURN_POPS_ARGS
37825 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
37827 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
37828 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
37830 #undef TARGET_SCALAR_MODE_SUPPORTED_P
37831 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
37833 #undef TARGET_VECTOR_MODE_SUPPORTED_P
37834 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
37836 #undef TARGET_C_MODE_FOR_SUFFIX
37837 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
37840 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
37841 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
37844 #ifdef SUBTARGET_INSERT_ATTRIBUTES
37845 #undef TARGET_INSERT_ATTRIBUTES
37846 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
37849 #undef TARGET_MANGLE_TYPE
37850 #define TARGET_MANGLE_TYPE ix86_mangle_type
37852 #ifndef TARGET_MACHO
37853 #undef TARGET_STACK_PROTECT_FAIL
37854 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
37857 #undef TARGET_FUNCTION_VALUE
37858 #define TARGET_FUNCTION_VALUE ix86_function_value
37860 #undef TARGET_FUNCTION_VALUE_REGNO_P
37861 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
37863 #undef TARGET_PROMOTE_FUNCTION_MODE
37864 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
37866 #undef TARGET_SECONDARY_RELOAD
37867 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
37869 #undef TARGET_CLASS_MAX_NREGS
37870 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
37872 #undef TARGET_PREFERRED_RELOAD_CLASS
37873 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
37874 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
37875 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
37876 #undef TARGET_CLASS_LIKELY_SPILLED_P
37877 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
37879 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
37880 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
37881 ix86_builtin_vectorization_cost
37882 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
37883 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
37884 ix86_vectorize_vec_perm_const_ok
37885 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
37886 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
37887 ix86_preferred_simd_mode
37888 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
37889 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
37890 ix86_autovectorize_vector_sizes
37892 #undef TARGET_SET_CURRENT_FUNCTION
37893 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
37895 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
37896 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
37898 #undef TARGET_OPTION_SAVE
37899 #define TARGET_OPTION_SAVE ix86_function_specific_save
37901 #undef TARGET_OPTION_RESTORE
37902 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
37904 #undef TARGET_OPTION_PRINT
37905 #define TARGET_OPTION_PRINT ix86_function_specific_print
37907 #undef TARGET_CAN_INLINE_P
37908 #define TARGET_CAN_INLINE_P ix86_can_inline_p
37910 #undef TARGET_EXPAND_TO_RTL_HOOK
37911 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
37913 #undef TARGET_LEGITIMATE_ADDRESS_P
37914 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
37916 #undef TARGET_LEGITIMATE_CONSTANT_P
37917 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
37919 #undef TARGET_FRAME_POINTER_REQUIRED
37920 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
37922 #undef TARGET_CAN_ELIMINATE
37923 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
37925 #undef TARGET_EXTRA_LIVE_ON_ENTRY
37926 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
37928 #undef TARGET_ASM_CODE_END
37929 #define TARGET_ASM_CODE_END ix86_code_end
37931 #undef TARGET_CONDITIONAL_REGISTER_USAGE
37932 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
37935 #undef TARGET_INIT_LIBFUNCS
37936 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
37939 struct gcc_target targetm = TARGET_INITIALIZER;
37941 #include "gt-i386.h"