1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
131 rtx vzeroupper_insn = NULL_RTX;
136 if (BLOCK_INFO (bb)->unchanged)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb)->state = state;
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
154 BLOCK_INFO (bb)->prev = state;
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
165 while (insn != bb_end)
167 insn = NEXT_INSN (insn);
169 if (!NONDEBUG_INSN_P (insn))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
175 if (!vzeroupper_insn)
178 if (PREV_INSN (insn) != vzeroupper_insn)
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
190 vzeroupper_insn = NULL_RTX;
194 pat = PATTERN (insn);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
224 else if (state != used)
226 note_stores (pat, check_avx256_stores, &state);
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256 != callee_return_pass_avx256)
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
277 vzeroupper_insn = insn;
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
303 enum upper_128bits_state state, old_state, new_state;
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
310 if (BLOCK_INFO (block)->processed)
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
321 switch (BLOCK_INFO (e->src)->state)
324 if (!unknown_is_unused)
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state != old_state)
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
385 BLOCK_INFO (e->dest)->processed = true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
416 fprintf (dump_file, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending))
420 fibheap_swap = pending;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
427 sbitmap_zero (visited);
429 cfun->machine->rescan_vzeroupper_p = 0;
431 while (!fibheap_empty (worklist))
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
440 SET_BIT (visited, bb->index);
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
449 if (TEST_BIT (visited, e->dest->index))
451 if (!TEST_BIT (in_pending, e->dest->index))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
460 else if (!TEST_BIT (in_worklist, e->dest->index))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
471 if (!cfun->machine->rescan_vzeroupper_p)
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
483 fprintf (dump_file, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb, true);
488 free_aux_for_blocks ();
491 static rtx legitimize_dllimport_symbol (rtx, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
583 struct processor_costs i386_cost = { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs i486_cost = { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentium_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
778 DUMMY_STRINGOP_ALGS},
779 {{libcall, {{-1, rep_prefix_4_byte}}},
780 DUMMY_STRINGOP_ALGS},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
795 struct processor_costs pentiumpro_cost = {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
853 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
854 DUMMY_STRINGOP_ALGS},
855 {{rep_prefix_4_byte, {{1024, unrolled_loop},
856 {8192, rep_prefix_4_byte}, {-1, libcall}}},
857 DUMMY_STRINGOP_ALGS},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
872 struct processor_costs geode_cost = {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs k6_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs athlon_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1089 struct processor_costs k8_cost = {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1150 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1151 {{libcall, {{8, loop}, {24, unrolled_loop},
1152 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1153 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost = {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1212 MOVD reg64, xmmreg Double FADD 3
1214 MOVD reg32, xmmreg Double FADD 3
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1237 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1238 {{libcall, {{8, loop}, {24, unrolled_loop},
1239 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1240 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost = {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1299 MOVD reg64, xmmreg Double FADD 3
1301 MOVD reg32, xmmreg Double FADD 3
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1324 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1325 {{libcall, {{8, loop}, {24, unrolled_loop},
1326 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1327 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs bdver2_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1386 MOVD reg64, xmmreg Double FADD 3
1388 MOVD reg32, xmmreg Double FADD 3
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1411 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1412 {{libcall, {{8, loop}, {24, unrolled_loop},
1413 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1414 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1428 struct processor_costs btver1_cost = {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1473 MOVD reg64, xmmreg Double FADD 3
1475 MOVD reg32, xmmreg Double FADD 3
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1493 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1494 {{libcall, {{8, loop}, {24, unrolled_loop},
1495 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1496 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1511 struct processor_costs pentium4_cost = {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1564 DUMMY_STRINGOP_ALGS},
1565 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1567 DUMMY_STRINGOP_ALGS},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1582 struct processor_costs nocona_cost = {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1635 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1636 {100000, unrolled_loop}, {-1, libcall}}}},
1637 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1639 {libcall, {{24, loop}, {64, unrolled_loop},
1640 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1655 struct processor_costs atom_cost = {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 2, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1708 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1709 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1710 {{libcall, {{8, loop}, {15, unrolled_loop},
1711 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1712 {libcall, {{24, loop}, {32, unrolled_loop},
1713 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1729 struct processor_costs generic64_cost = {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS,
1788 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1789 {DUMMY_STRINGOP_ALGS,
1790 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1807 struct processor_costs generic32_cost = {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1860 DUMMY_STRINGOP_ALGS},
1861 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1862 DUMMY_STRINGOP_ALGS},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1876 const struct processor_costs *ix86_cost = &pentium_cost;
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features[X86_TUNE_LAST];
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7 | m_GENERIC,
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386 | m_486 | m_K6_GEODE,
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1972 /* X86_TUNE_USE_MOV0 */
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1987 /* X86_TUNE_READ_MODIFY */
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386 | m_486 | m_PENT),
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386 | m_P4_NOCONA,
1999 /* X86_TUNE_QIMODE_MATH */
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386 | m_486 | m_PENT | m_PPRO,
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386 | m_486 | m_PENT | m_K6_GEODE,
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT | m_K6_GEODE,
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO | m_P4_NOCONA,
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2078 /* X86_TUNE_SHIFT1 */
2081 /* X86_TUNE_USE_FFREEP */
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE | m_GENERIC),
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10 | m_BDVER ),
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2128 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2145 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2167 m_K6_GEODE | m_AMD_MULTIPLE,
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2173 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2174 during reassociation of integer computation. */
2177 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2178 during reassociation of fp computation. */
2182 /* Feature tests against the various architecture variations. */
2183 unsigned char ix86_arch_features[X86_ARCH_LAST];
2185 /* Feature tests against the various architecture variations, used to create
2186 ix86_arch_features based on the processor mask. */
2187 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2188 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2189 ~(m_386 | m_486 | m_PENT | m_K6),
2191 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2194 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2197 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2200 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2204 static const unsigned int x86_accumulate_outgoing_args
2205 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2207 static const unsigned int x86_arch_always_fancy_math_387
2208 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2210 static const unsigned int x86_avx256_split_unaligned_load
2211 = m_COREI7 | m_GENERIC;
2213 static const unsigned int x86_avx256_split_unaligned_store
2214 = m_COREI7 | m_BDVER | m_GENERIC;
2216 /* In case the average insn count for single function invocation is
2217 lower than this constant, emit fast (but longer) prologue and
2219 #define FAST_PROLOGUE_INSN_COUNT 20
2221 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2222 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2223 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2224 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2226 /* Array of the smallest class containing reg number REGNO, indexed by
2227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2229 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2231 /* ax, dx, cx, bx */
2232 AREG, DREG, CREG, BREG,
2233 /* si, di, bp, sp */
2234 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2236 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2237 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2240 /* flags, fpsr, fpcr, frame */
2241 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2243 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2246 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2249 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2250 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2251 /* SSE REX registers */
2252 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2256 /* The "default" register map used in 32bit mode. */
2258 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2260 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2261 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2262 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2263 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2264 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2269 /* The "default" register map used in 64bit mode. */
2271 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2273 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2274 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2275 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2276 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2277 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2278 8,9,10,11,12,13,14,15, /* extended integer registers */
2279 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2282 /* Define the register numbers to be used in Dwarf debugging information.
2283 The SVR4 reference port C compiler uses the following register numbers
2284 in its Dwarf output code:
2285 0 for %eax (gcc regno = 0)
2286 1 for %ecx (gcc regno = 2)
2287 2 for %edx (gcc regno = 1)
2288 3 for %ebx (gcc regno = 3)
2289 4 for %esp (gcc regno = 7)
2290 5 for %ebp (gcc regno = 6)
2291 6 for %esi (gcc regno = 4)
2292 7 for %edi (gcc regno = 5)
2293 The following three DWARF register numbers are never generated by
2294 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2295 believes these numbers have these meanings.
2296 8 for %eip (no gcc equivalent)
2297 9 for %eflags (gcc regno = 17)
2298 10 for %trapno (no gcc equivalent)
2299 It is not at all clear how we should number the FP stack registers
2300 for the x86 architecture. If the version of SDB on x86/svr4 were
2301 a bit less brain dead with respect to floating-point then we would
2302 have a precedent to follow with respect to DWARF register numbers
2303 for x86 FP registers, but the SDB on x86/svr4 is so completely
2304 broken with respect to FP registers that it is hardly worth thinking
2305 of it as something to strive for compatibility with.
2306 The version of x86/svr4 SDB I have at the moment does (partially)
2307 seem to believe that DWARF register number 11 is associated with
2308 the x86 register %st(0), but that's about all. Higher DWARF
2309 register numbers don't seem to be associated with anything in
2310 particular, and even for DWARF regno 11, SDB only seems to under-
2311 stand that it should say that a variable lives in %st(0) (when
2312 asked via an `=' command) if we said it was in DWARF regno 11,
2313 but SDB still prints garbage when asked for the value of the
2314 variable in question (via a `/' command).
2315 (Also note that the labels SDB prints for various FP stack regs
2316 when doing an `x' command are all wrong.)
2317 Note that these problems generally don't affect the native SVR4
2318 C compiler because it doesn't allow the use of -O with -g and
2319 because when it is *not* optimizing, it allocates a memory
2320 location for each floating-point variable, and the memory
2321 location is what gets described in the DWARF AT_location
2322 attribute for the variable in question.
2323 Regardless of the severe mental illness of the x86/svr4 SDB, we
2324 do something sensible here and we use the following DWARF
2325 register numbers. Note that these are all stack-top-relative
2327 11 for %st(0) (gcc regno = 8)
2328 12 for %st(1) (gcc regno = 9)
2329 13 for %st(2) (gcc regno = 10)
2330 14 for %st(3) (gcc regno = 11)
2331 15 for %st(4) (gcc regno = 12)
2332 16 for %st(5) (gcc regno = 13)
2333 17 for %st(6) (gcc regno = 14)
2334 18 for %st(7) (gcc regno = 15)
2336 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2338 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2339 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2340 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2341 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2342 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2343 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2347 /* Define parameter passing and return registers. */
2349 static int const x86_64_int_parameter_registers[6] =
2351 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2354 static int const x86_64_ms_abi_int_parameter_registers[4] =
2356 CX_REG, DX_REG, R8_REG, R9_REG
2359 static int const x86_64_int_return_registers[4] =
2361 AX_REG, DX_REG, DI_REG, SI_REG
2364 /* Define the structure for the machine field in struct function. */
2366 struct GTY(()) stack_local_entry {
2367 unsigned short mode;
2370 struct stack_local_entry *next;
2373 /* Structure describing stack frame layout.
2374 Stack grows downward:
2380 saved static chain if ix86_static_chain_on_stack
2382 saved frame pointer if frame_pointer_needed
2383 <- HARD_FRAME_POINTER
2389 <- sse_regs_save_offset
2392 [va_arg registers] |
2396 [padding2] | = to_allocate
2405 int outgoing_arguments_size;
2406 HOST_WIDE_INT frame;
2408 /* The offsets relative to ARG_POINTER. */
2409 HOST_WIDE_INT frame_pointer_offset;
2410 HOST_WIDE_INT hard_frame_pointer_offset;
2411 HOST_WIDE_INT stack_pointer_offset;
2412 HOST_WIDE_INT hfp_save_offset;
2413 HOST_WIDE_INT reg_save_offset;
2414 HOST_WIDE_INT sse_reg_save_offset;
2416 /* When save_regs_using_mov is set, emit prologue using
2417 move instead of push instructions. */
2418 bool save_regs_using_mov;
2421 /* Which cpu are we scheduling for. */
2422 enum attr_cpu ix86_schedule;
2424 /* Which cpu are we optimizing for. */
2425 enum processor_type ix86_tune;
2427 /* Which instruction set architecture to use. */
2428 enum processor_type ix86_arch;
2430 /* true if sse prefetch instruction is not NOOP. */
2431 int x86_prefetch_sse;
2433 /* -mstackrealign option */
2434 static const char ix86_force_align_arg_pointer_string[]
2435 = "force_align_arg_pointer";
2437 static rtx (*ix86_gen_leave) (void);
2438 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2439 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2440 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2441 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2442 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2443 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2444 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2445 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2446 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2448 /* Preferred alignment for stack boundary in bits. */
2449 unsigned int ix86_preferred_stack_boundary;
2451 /* Alignment for incoming stack boundary in bits specified at
2453 static unsigned int ix86_user_incoming_stack_boundary;
2455 /* Default alignment for incoming stack boundary in bits. */
2456 static unsigned int ix86_default_incoming_stack_boundary;
2458 /* Alignment for incoming stack boundary in bits. */
2459 unsigned int ix86_incoming_stack_boundary;
2461 /* Calling abi specific va_list type nodes. */
2462 static GTY(()) tree sysv_va_list_type_node;
2463 static GTY(()) tree ms_va_list_type_node;
2465 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2466 char internal_label_prefix[16];
2467 int internal_label_prefix_len;
2469 /* Fence to use after loop using movnt. */
2472 /* Register class used for passing given 64bit part of the argument.
2473 These represent classes as documented by the PS ABI, with the exception
2474 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2475 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2477 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2478 whenever possible (upper half does contain padding). */
2479 enum x86_64_reg_class
2482 X86_64_INTEGER_CLASS,
2483 X86_64_INTEGERSI_CLASS,
2490 X86_64_COMPLEX_X87_CLASS,
2494 #define MAX_CLASSES 4
2496 /* Table of constants used by fldpi, fldln2, etc.... */
2497 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2498 static bool ext_80387_constants_init = 0;
2501 static struct machine_function * ix86_init_machine_status (void);
2502 static rtx ix86_function_value (const_tree, const_tree, bool);
2503 static bool ix86_function_value_regno_p (const unsigned int);
2504 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2506 static rtx ix86_static_chain (const_tree, bool);
2507 static int ix86_function_regparm (const_tree, const_tree);
2508 static void ix86_compute_frame_layout (struct ix86_frame *);
2509 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2511 static void ix86_add_new_builtins (HOST_WIDE_INT);
2512 static rtx ix86_expand_vec_perm_builtin (tree);
2513 static tree ix86_canonical_va_list_type (tree);
2514 static void predict_jump (int);
2515 static unsigned int split_stack_prologue_scratch_regno (void);
2516 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2518 enum ix86_function_specific_strings
2520 IX86_FUNCTION_SPECIFIC_ARCH,
2521 IX86_FUNCTION_SPECIFIC_TUNE,
2522 IX86_FUNCTION_SPECIFIC_MAX
2525 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2526 const char *, enum fpmath_unit, bool);
2527 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2528 static void ix86_function_specific_save (struct cl_target_option *);
2529 static void ix86_function_specific_restore (struct cl_target_option *);
2530 static void ix86_function_specific_print (FILE *, int,
2531 struct cl_target_option *);
2532 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2533 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2534 struct gcc_options *);
2535 static bool ix86_can_inline_p (tree, tree);
2536 static void ix86_set_current_function (tree);
2537 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2539 static enum calling_abi ix86_function_abi (const_tree);
2542 #ifndef SUBTARGET32_DEFAULT_CPU
2543 #define SUBTARGET32_DEFAULT_CPU "i386"
2546 /* The svr4 ABI for the i386 says that records and unions are returned
2548 #ifndef DEFAULT_PCC_STRUCT_RETURN
2549 #define DEFAULT_PCC_STRUCT_RETURN 1
2552 /* Whether -mtune= or -march= were specified */
2553 static int ix86_tune_defaulted;
2554 static int ix86_arch_specified;
2556 /* Vectorization library interface and handlers. */
2557 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2559 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2560 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2562 /* Processor target table, indexed by processor number */
2565 const struct processor_costs *cost; /* Processor costs */
2566 const int align_loop; /* Default alignments. */
2567 const int align_loop_max_skip;
2568 const int align_jump;
2569 const int align_jump_max_skip;
2570 const int align_func;
2573 static const struct ptt processor_target_table[PROCESSOR_max] =
2575 {&i386_cost, 4, 3, 4, 3, 4},
2576 {&i486_cost, 16, 15, 16, 15, 16},
2577 {&pentium_cost, 16, 7, 16, 7, 16},
2578 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2579 {&geode_cost, 0, 0, 0, 0, 0},
2580 {&k6_cost, 32, 7, 32, 7, 32},
2581 {&athlon_cost, 16, 7, 16, 7, 16},
2582 {&pentium4_cost, 0, 0, 0, 0, 0},
2583 {&k8_cost, 16, 7, 16, 7, 16},
2584 {&nocona_cost, 0, 0, 0, 0, 0},
2585 /* Core 2 32-bit. */
2586 {&generic32_cost, 16, 10, 16, 10, 16},
2587 /* Core 2 64-bit. */
2588 {&generic64_cost, 16, 10, 16, 10, 16},
2589 /* Core i7 32-bit. */
2590 {&generic32_cost, 16, 10, 16, 10, 16},
2591 /* Core i7 64-bit. */
2592 {&generic64_cost, 16, 10, 16, 10, 16},
2593 {&generic32_cost, 16, 7, 16, 7, 16},
2594 {&generic64_cost, 16, 10, 16, 10, 16},
2595 {&amdfam10_cost, 32, 24, 32, 7, 32},
2596 {&bdver1_cost, 32, 24, 32, 7, 32},
2597 {&bdver2_cost, 32, 24, 32, 7, 32},
2598 {&btver1_cost, 32, 24, 32, 7, 32},
2599 {&atom_cost, 16, 7, 16, 7, 16}
2602 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2632 /* Return true if a red-zone is in use. */
2635 ix86_using_red_zone (void)
2637 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2640 /* Return a string that documents the current -m options. The caller is
2641 responsible for freeing the string. */
2644 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2645 const char *tune, enum fpmath_unit fpmath,
2648 struct ix86_target_opts
2650 const char *option; /* option string */
2651 HOST_WIDE_INT mask; /* isa mask options */
2654 /* This table is ordered so that options like -msse4.2 that imply
2655 preceding options while match those first. */
2656 static struct ix86_target_opts isa_opts[] =
2658 { "-m64", OPTION_MASK_ISA_64BIT },
2659 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2660 { "-mfma", OPTION_MASK_ISA_FMA },
2661 { "-mxop", OPTION_MASK_ISA_XOP },
2662 { "-mlwp", OPTION_MASK_ISA_LWP },
2663 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2664 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2665 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2666 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2667 { "-msse3", OPTION_MASK_ISA_SSE3 },
2668 { "-msse2", OPTION_MASK_ISA_SSE2 },
2669 { "-msse", OPTION_MASK_ISA_SSE },
2670 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2671 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2672 { "-mmmx", OPTION_MASK_ISA_MMX },
2673 { "-mabm", OPTION_MASK_ISA_ABM },
2674 { "-mbmi", OPTION_MASK_ISA_BMI },
2675 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2676 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2677 { "-mtbm", OPTION_MASK_ISA_TBM },
2678 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2679 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2680 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2681 { "-maes", OPTION_MASK_ISA_AES },
2682 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2683 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2684 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2685 { "-mf16c", OPTION_MASK_ISA_F16C },
2689 static struct ix86_target_opts flag_opts[] =
2691 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2692 { "-m80387", MASK_80387 },
2693 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2694 { "-malign-double", MASK_ALIGN_DOUBLE },
2695 { "-mcld", MASK_CLD },
2696 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2697 { "-mieee-fp", MASK_IEEE_FP },
2698 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2699 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2700 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2701 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2702 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2703 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2704 { "-mno-red-zone", MASK_NO_RED_ZONE },
2705 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2706 { "-mrecip", MASK_RECIP },
2707 { "-mrtd", MASK_RTD },
2708 { "-msseregparm", MASK_SSEREGPARM },
2709 { "-mstack-arg-probe", MASK_STACK_PROBE },
2710 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2711 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2712 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2713 { "-mvzeroupper", MASK_VZEROUPPER },
2714 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2715 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2716 { "-mprefer-avx128", MASK_PREFER_AVX128},
2719 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2722 char target_other[40];
2731 memset (opts, '\0', sizeof (opts));
2733 /* Add -march= option. */
2736 opts[num][0] = "-march=";
2737 opts[num++][1] = arch;
2740 /* Add -mtune= option. */
2743 opts[num][0] = "-mtune=";
2744 opts[num++][1] = tune;
2747 /* Pick out the options in isa options. */
2748 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2750 if ((isa & isa_opts[i].mask) != 0)
2752 opts[num++][0] = isa_opts[i].option;
2753 isa &= ~ isa_opts[i].mask;
2757 if (isa && add_nl_p)
2759 opts[num++][0] = isa_other;
2760 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2764 /* Add flag options. */
2765 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2767 if ((flags & flag_opts[i].mask) != 0)
2769 opts[num++][0] = flag_opts[i].option;
2770 flags &= ~ flag_opts[i].mask;
2774 if (flags && add_nl_p)
2776 opts[num++][0] = target_other;
2777 sprintf (target_other, "(other flags: %#x)", flags);
2780 /* Add -fpmath= option. */
2783 opts[num][0] = "-mfpmath=";
2784 switch ((int) fpmath)
2787 opts[num++][1] = "387";
2791 opts[num++][1] = "sse";
2794 case FPMATH_387 | FPMATH_SSE:
2795 opts[num++][1] = "sse+387";
2807 gcc_assert (num < ARRAY_SIZE (opts));
2809 /* Size the string. */
2811 sep_len = (add_nl_p) ? 3 : 1;
2812 for (i = 0; i < num; i++)
2815 for (j = 0; j < 2; j++)
2817 len += strlen (opts[i][j]);
2820 /* Build the string. */
2821 ret = ptr = (char *) xmalloc (len);
2824 for (i = 0; i < num; i++)
2828 for (j = 0; j < 2; j++)
2829 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2836 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2844 for (j = 0; j < 2; j++)
2847 memcpy (ptr, opts[i][j], len2[j]);
2849 line_len += len2[j];
2854 gcc_assert (ret + len >= ptr);
2859 /* Return true, if profiling code should be emitted before
2860 prologue. Otherwise it returns false.
2861 Note: For x86 with "hotfix" it is sorried. */
2863 ix86_profile_before_prologue (void)
2865 return flag_fentry != 0;
2868 /* Function that is callable from the debugger to print the current
2871 ix86_debug_options (void)
2873 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2874 ix86_arch_string, ix86_tune_string,
2879 fprintf (stderr, "%s\n\n", opts);
2883 fputs ("<no options>\n\n", stderr);
2888 /* Override various settings based on options. If MAIN_ARGS_P, the
2889 options are from the command line, otherwise they are from
2893 ix86_option_override_internal (bool main_args_p)
2896 unsigned int ix86_arch_mask, ix86_tune_mask;
2897 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2902 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2903 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2904 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2905 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2906 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2907 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2908 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2909 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2910 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2911 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2912 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2913 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2914 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2915 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2916 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2917 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2918 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2919 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2920 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2921 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2922 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2923 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2924 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2925 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2926 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2927 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2928 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2929 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2930 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2931 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2932 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2933 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2934 /* if this reaches 64, need to widen struct pta flags below */
2938 const char *const name; /* processor name or nickname. */
2939 const enum processor_type processor;
2940 const enum attr_cpu schedule;
2941 const unsigned HOST_WIDE_INT flags;
2943 const processor_alias_table[] =
2945 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2946 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2947 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2948 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2949 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2950 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2951 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2952 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2953 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2954 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2955 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2956 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2957 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2959 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2961 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2962 PTA_MMX | PTA_SSE | PTA_SSE2},
2963 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2964 PTA_MMX |PTA_SSE | PTA_SSE2},
2965 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2966 PTA_MMX | PTA_SSE | PTA_SSE2},
2967 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2968 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2969 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2970 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2971 | PTA_CX16 | PTA_NO_SAHF},
2972 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2973 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2974 | PTA_SSSE3 | PTA_CX16},
2975 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2976 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2977 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2978 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2979 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2980 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2981 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2982 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
2983 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2984 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2985 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2986 | PTA_RDRND | PTA_F16C},
2987 {"core-avx2", PROCESSOR_COREI7_64, CPU_COREI7,
2988 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2989 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
2990 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2991 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
2992 | PTA_FMA | PTA_MOVBE},
2993 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2994 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2995 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2996 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2997 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2998 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2999 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3000 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3001 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3002 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3003 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3004 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3005 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3006 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3007 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3008 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3009 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3010 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3011 {"x86-64", PROCESSOR_K8, CPU_K8,
3012 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3013 {"k8", PROCESSOR_K8, CPU_K8,
3014 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3015 | PTA_SSE2 | PTA_NO_SAHF},
3016 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3017 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3018 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3019 {"opteron", PROCESSOR_K8, CPU_K8,
3020 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3021 | PTA_SSE2 | PTA_NO_SAHF},
3022 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3023 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3024 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3025 {"athlon64", PROCESSOR_K8, CPU_K8,
3026 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3027 | PTA_SSE2 | PTA_NO_SAHF},
3028 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3029 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3030 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3031 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3032 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3033 | PTA_SSE2 | PTA_NO_SAHF},
3034 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3035 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3036 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3037 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3038 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3039 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3040 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3041 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3042 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3043 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3044 | PTA_XOP | PTA_LWP},
3045 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3046 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3047 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3048 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3049 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3051 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3052 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3053 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3054 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3055 0 /* flags are only used for -march switch. */ },
3056 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3057 PTA_64BIT /* flags are only used for -march switch. */ },
3060 /* -mrecip options. */
3063 const char *string; /* option name */
3064 unsigned int mask; /* mask bits to set */
3066 const recip_options[] =
3068 { "all", RECIP_MASK_ALL },
3069 { "none", RECIP_MASK_NONE },
3070 { "div", RECIP_MASK_DIV },
3071 { "sqrt", RECIP_MASK_SQRT },
3072 { "vec-div", RECIP_MASK_VEC_DIV },
3073 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3076 int const pta_size = ARRAY_SIZE (processor_alias_table);
3078 /* Set up prefix/suffix so the error messages refer to either the command
3079 line argument, or the attribute(target). */
3088 prefix = "option(\"";
3093 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3094 SUBTARGET_OVERRIDE_OPTIONS;
3097 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3098 SUBSUBTARGET_OVERRIDE_OPTIONS;
3102 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3104 /* -fPIC is the default for x86_64. */
3105 if (TARGET_MACHO && TARGET_64BIT)
3108 /* Need to check -mtune=generic first. */
3109 if (ix86_tune_string)
3111 if (!strcmp (ix86_tune_string, "generic")
3112 || !strcmp (ix86_tune_string, "i686")
3113 /* As special support for cross compilers we read -mtune=native
3114 as -mtune=generic. With native compilers we won't see the
3115 -mtune=native, as it was changed by the driver. */
3116 || !strcmp (ix86_tune_string, "native"))
3119 ix86_tune_string = "generic64";
3121 ix86_tune_string = "generic32";
3123 /* If this call is for setting the option attribute, allow the
3124 generic32/generic64 that was previously set. */
3125 else if (!main_args_p
3126 && (!strcmp (ix86_tune_string, "generic32")
3127 || !strcmp (ix86_tune_string, "generic64")))
3129 else if (!strncmp (ix86_tune_string, "generic", 7))
3130 error ("bad value (%s) for %stune=%s %s",
3131 ix86_tune_string, prefix, suffix, sw);
3132 else if (!strcmp (ix86_tune_string, "x86-64"))
3133 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3134 "%stune=k8%s or %stune=generic%s instead as appropriate",
3135 prefix, suffix, prefix, suffix, prefix, suffix);
3139 if (ix86_arch_string)
3140 ix86_tune_string = ix86_arch_string;
3141 if (!ix86_tune_string)
3143 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3144 ix86_tune_defaulted = 1;
3147 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3148 need to use a sensible tune option. */
3149 if (!strcmp (ix86_tune_string, "generic")
3150 || !strcmp (ix86_tune_string, "x86-64")
3151 || !strcmp (ix86_tune_string, "i686"))
3154 ix86_tune_string = "generic64";
3156 ix86_tune_string = "generic32";
3160 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3162 /* rep; movq isn't available in 32-bit code. */
3163 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3164 ix86_stringop_alg = no_stringop;
3167 if (!ix86_arch_string)
3168 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3170 ix86_arch_specified = 1;
3172 if (!global_options_set.x_ix86_abi)
3173 ix86_abi = DEFAULT_ABI;
3175 if (global_options_set.x_ix86_cmodel)
3177 switch (ix86_cmodel)
3182 ix86_cmodel = CM_SMALL_PIC;
3184 error ("code model %qs not supported in the %s bit mode",
3191 ix86_cmodel = CM_MEDIUM_PIC;
3193 error ("code model %qs not supported in the %s bit mode",
3195 else if (TARGET_X32)
3196 error ("code model %qs not supported in x32 mode",
3203 ix86_cmodel = CM_LARGE_PIC;
3205 error ("code model %qs not supported in the %s bit mode",
3207 else if (TARGET_X32)
3208 error ("code model %qs not supported in x32 mode",
3214 error ("code model %s does not support PIC mode", "32");
3216 error ("code model %qs not supported in the %s bit mode",
3223 error ("code model %s does not support PIC mode", "kernel");
3224 ix86_cmodel = CM_32;
3227 error ("code model %qs not supported in the %s bit mode",
3237 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3238 use of rip-relative addressing. This eliminates fixups that
3239 would otherwise be needed if this object is to be placed in a
3240 DLL, and is essentially just as efficient as direct addressing. */
3241 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3242 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3243 else if (TARGET_64BIT)
3244 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3246 ix86_cmodel = CM_32;
3248 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3250 error ("-masm=intel not supported in this configuration");
3251 ix86_asm_dialect = ASM_ATT;
3253 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3254 sorry ("%i-bit mode not compiled in",
3255 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3257 for (i = 0; i < pta_size; i++)
3258 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3260 ix86_schedule = processor_alias_table[i].schedule;
3261 ix86_arch = processor_alias_table[i].processor;
3262 /* Default cpu tuning to the architecture. */
3263 ix86_tune = ix86_arch;
3265 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3266 error ("CPU you selected does not support x86-64 "
3269 if (processor_alias_table[i].flags & PTA_MMX
3270 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3271 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3272 if (processor_alias_table[i].flags & PTA_3DNOW
3273 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3274 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3275 if (processor_alias_table[i].flags & PTA_3DNOW_A
3276 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3277 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3278 if (processor_alias_table[i].flags & PTA_SSE
3279 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3280 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3281 if (processor_alias_table[i].flags & PTA_SSE2
3282 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3283 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3284 if (processor_alias_table[i].flags & PTA_SSE3
3285 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3286 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3287 if (processor_alias_table[i].flags & PTA_SSSE3
3288 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3289 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3290 if (processor_alias_table[i].flags & PTA_SSE4_1
3291 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3292 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3293 if (processor_alias_table[i].flags & PTA_SSE4_2
3294 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3295 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3296 if (processor_alias_table[i].flags & PTA_AVX
3297 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3298 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3299 if (processor_alias_table[i].flags & PTA_AVX2
3300 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3301 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3302 if (processor_alias_table[i].flags & PTA_FMA
3303 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3304 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3305 if (processor_alias_table[i].flags & PTA_SSE4A
3306 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3307 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3308 if (processor_alias_table[i].flags & PTA_FMA4
3309 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3310 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3311 if (processor_alias_table[i].flags & PTA_XOP
3312 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3313 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3314 if (processor_alias_table[i].flags & PTA_LWP
3315 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3316 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3317 if (processor_alias_table[i].flags & PTA_ABM
3318 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3319 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3320 if (processor_alias_table[i].flags & PTA_BMI
3321 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3322 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3323 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3324 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3325 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3326 if (processor_alias_table[i].flags & PTA_TBM
3327 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3328 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3329 if (processor_alias_table[i].flags & PTA_BMI2
3330 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3331 ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3332 if (processor_alias_table[i].flags & PTA_CX16
3333 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3334 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3335 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3336 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3337 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3338 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3339 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3340 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3341 if (processor_alias_table[i].flags & PTA_MOVBE
3342 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3343 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3344 if (processor_alias_table[i].flags & PTA_AES
3345 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3346 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3347 if (processor_alias_table[i].flags & PTA_PCLMUL
3348 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3349 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3350 if (processor_alias_table[i].flags & PTA_FSGSBASE
3351 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3352 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3353 if (processor_alias_table[i].flags & PTA_RDRND
3354 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3355 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3356 if (processor_alias_table[i].flags & PTA_F16C
3357 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3358 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3359 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3360 x86_prefetch_sse = true;
3365 if (!strcmp (ix86_arch_string, "generic"))
3366 error ("generic CPU can be used only for %stune=%s %s",
3367 prefix, suffix, sw);
3368 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3369 error ("bad value (%s) for %sarch=%s %s",
3370 ix86_arch_string, prefix, suffix, sw);
3372 ix86_arch_mask = 1u << ix86_arch;
3373 for (i = 0; i < X86_ARCH_LAST; ++i)
3374 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3376 for (i = 0; i < pta_size; i++)
3377 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3379 ix86_schedule = processor_alias_table[i].schedule;
3380 ix86_tune = processor_alias_table[i].processor;
3383 if (!(processor_alias_table[i].flags & PTA_64BIT))
3385 if (ix86_tune_defaulted)
3387 ix86_tune_string = "x86-64";
3388 for (i = 0; i < pta_size; i++)
3389 if (! strcmp (ix86_tune_string,
3390 processor_alias_table[i].name))
3392 ix86_schedule = processor_alias_table[i].schedule;
3393 ix86_tune = processor_alias_table[i].processor;
3396 error ("CPU you selected does not support x86-64 "
3402 /* Adjust tuning when compiling for 32-bit ABI. */
3405 case PROCESSOR_GENERIC64:
3406 ix86_tune = PROCESSOR_GENERIC32;
3407 ix86_schedule = CPU_PENTIUMPRO;
3410 case PROCESSOR_CORE2_64:
3411 ix86_tune = PROCESSOR_CORE2_32;
3414 case PROCESSOR_COREI7_64:
3415 ix86_tune = PROCESSOR_COREI7_32;
3422 /* Intel CPUs have always interpreted SSE prefetch instructions as
3423 NOPs; so, we can enable SSE prefetch instructions even when
3424 -mtune (rather than -march) points us to a processor that has them.
3425 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3426 higher processors. */
3428 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3429 x86_prefetch_sse = true;
3433 if (ix86_tune_specified && i == pta_size)
3434 error ("bad value (%s) for %stune=%s %s",
3435 ix86_tune_string, prefix, suffix, sw);
3437 ix86_tune_mask = 1u << ix86_tune;
3438 for (i = 0; i < X86_TUNE_LAST; ++i)
3439 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3441 #ifndef USE_IX86_FRAME_POINTER
3442 #define USE_IX86_FRAME_POINTER 0
3445 #ifndef USE_X86_64_FRAME_POINTER
3446 #define USE_X86_64_FRAME_POINTER 0
3449 /* Set the default values for switches whose default depends on TARGET_64BIT
3450 in case they weren't overwritten by command line options. */
3453 if (optimize > 1 && !global_options_set.x_flag_zee)
3455 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3456 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3457 if (flag_asynchronous_unwind_tables == 2)
3458 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3459 if (flag_pcc_struct_return == 2)
3460 flag_pcc_struct_return = 0;
3464 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3465 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3466 if (flag_asynchronous_unwind_tables == 2)
3467 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3468 if (flag_pcc_struct_return == 2)
3469 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3473 ix86_cost = &ix86_size_cost;
3475 ix86_cost = processor_target_table[ix86_tune].cost;
3477 /* Arrange to set up i386_stack_locals for all functions. */
3478 init_machine_status = ix86_init_machine_status;
3480 /* Validate -mregparm= value. */
3481 if (global_options_set.x_ix86_regparm)
3484 warning (0, "-mregparm is ignored in 64-bit mode");
3485 if (ix86_regparm > REGPARM_MAX)
3487 error ("-mregparm=%d is not between 0 and %d",
3488 ix86_regparm, REGPARM_MAX);
3493 ix86_regparm = REGPARM_MAX;
3495 /* Default align_* from the processor table. */
3496 if (align_loops == 0)
3498 align_loops = processor_target_table[ix86_tune].align_loop;
3499 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3501 if (align_jumps == 0)
3503 align_jumps = processor_target_table[ix86_tune].align_jump;
3504 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3506 if (align_functions == 0)
3508 align_functions = processor_target_table[ix86_tune].align_func;
3511 /* Provide default for -mbranch-cost= value. */
3512 if (!global_options_set.x_ix86_branch_cost)
3513 ix86_branch_cost = ix86_cost->branch_cost;
3517 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3519 /* Enable by default the SSE and MMX builtins. Do allow the user to
3520 explicitly disable any of these. In particular, disabling SSE and
3521 MMX for kernel code is extremely useful. */
3522 if (!ix86_arch_specified)
3524 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3525 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3528 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3532 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3534 if (!ix86_arch_specified)
3536 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3538 /* i386 ABI does not specify red zone. It still makes sense to use it
3539 when programmer takes care to stack from being destroyed. */
3540 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3541 target_flags |= MASK_NO_RED_ZONE;
3544 /* Keep nonleaf frame pointers. */
3545 if (flag_omit_frame_pointer)
3546 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3547 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3548 flag_omit_frame_pointer = 1;
3550 /* If we're doing fast math, we don't care about comparison order
3551 wrt NaNs. This lets us use a shorter comparison sequence. */
3552 if (flag_finite_math_only)
3553 target_flags &= ~MASK_IEEE_FP;
3555 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3556 since the insns won't need emulation. */
3557 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3558 target_flags &= ~MASK_NO_FANCY_MATH_387;
3560 /* Likewise, if the target doesn't have a 387, or we've specified
3561 software floating point, don't use 387 inline intrinsics. */
3563 target_flags |= MASK_NO_FANCY_MATH_387;
3565 /* Turn on MMX builtins for -msse. */
3568 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3569 x86_prefetch_sse = true;
3572 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3573 if (TARGET_SSE4_2 || TARGET_ABM)
3574 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3576 /* Turn on lzcnt instruction for -mabm. */
3578 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3580 /* Validate -mpreferred-stack-boundary= value or default it to
3581 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3582 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3583 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3585 int min = (TARGET_64BIT ? 4 : 2);
3586 int max = (TARGET_SEH ? 4 : 12);
3588 if (ix86_preferred_stack_boundary_arg < min
3589 || ix86_preferred_stack_boundary_arg > max)
3592 error ("-mpreferred-stack-boundary is not supported "
3595 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3596 ix86_preferred_stack_boundary_arg, min, max);
3599 ix86_preferred_stack_boundary
3600 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3603 /* Set the default value for -mstackrealign. */
3604 if (ix86_force_align_arg_pointer == -1)
3605 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3607 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3609 /* Validate -mincoming-stack-boundary= value or default it to
3610 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3611 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3612 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3614 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3615 || ix86_incoming_stack_boundary_arg > 12)
3616 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3617 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3620 ix86_user_incoming_stack_boundary
3621 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3622 ix86_incoming_stack_boundary
3623 = ix86_user_incoming_stack_boundary;
3627 /* Accept -msseregparm only if at least SSE support is enabled. */
3628 if (TARGET_SSEREGPARM
3630 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3632 if (global_options_set.x_ix86_fpmath)
3634 if (ix86_fpmath & FPMATH_SSE)
3638 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3639 ix86_fpmath = FPMATH_387;
3641 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3643 warning (0, "387 instruction set disabled, using SSE arithmetics");
3644 ix86_fpmath = FPMATH_SSE;
3649 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3651 /* If the i387 is disabled, then do not return values in it. */
3653 target_flags &= ~MASK_FLOAT_RETURNS;
3655 /* Use external vectorized library in vectorizing intrinsics. */
3656 if (global_options_set.x_ix86_veclibabi_type)
3657 switch (ix86_veclibabi_type)
3659 case ix86_veclibabi_type_svml:
3660 ix86_veclib_handler = ix86_veclibabi_svml;
3663 case ix86_veclibabi_type_acml:
3664 ix86_veclib_handler = ix86_veclibabi_acml;
3671 if ((!USE_IX86_FRAME_POINTER
3672 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3673 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3675 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3677 /* ??? Unwind info is not correct around the CFG unless either a frame
3678 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3679 unwind info generation to be aware of the CFG and propagating states
3681 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3682 || flag_exceptions || flag_non_call_exceptions)
3683 && flag_omit_frame_pointer
3684 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3686 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3687 warning (0, "unwind tables currently require either a frame pointer "
3688 "or %saccumulate-outgoing-args%s for correctness",
3690 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3693 /* If stack probes are required, the space used for large function
3694 arguments on the stack must also be probed, so enable
3695 -maccumulate-outgoing-args so this happens in the prologue. */
3696 if (TARGET_STACK_PROBE
3697 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3699 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3700 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3701 "for correctness", prefix, suffix);
3702 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3705 /* For sane SSE instruction set generation we need fcomi instruction.
3706 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3707 expands to a sequence that includes conditional move. */
3708 if (TARGET_SSE || TARGET_RDRND)
3711 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3714 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3715 p = strchr (internal_label_prefix, 'X');
3716 internal_label_prefix_len = p - internal_label_prefix;
3720 /* When scheduling description is not available, disable scheduler pass
3721 so it won't slow down the compilation and make x87 code slower. */
3722 if (!TARGET_SCHEDULE)
3723 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3725 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3726 ix86_cost->simultaneous_prefetches,
3727 global_options.x_param_values,
3728 global_options_set.x_param_values);
3729 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3730 global_options.x_param_values,
3731 global_options_set.x_param_values);
3732 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3733 global_options.x_param_values,
3734 global_options_set.x_param_values);
3735 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3736 global_options.x_param_values,
3737 global_options_set.x_param_values);
3739 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3740 if (flag_prefetch_loop_arrays < 0
3743 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3744 flag_prefetch_loop_arrays = 1;
3746 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3747 can be optimized to ap = __builtin_next_arg (0). */
3748 if (!TARGET_64BIT && !flag_split_stack)
3749 targetm.expand_builtin_va_start = NULL;
3753 ix86_gen_leave = gen_leave_rex64;
3754 ix86_gen_add3 = gen_adddi3;
3755 ix86_gen_sub3 = gen_subdi3;
3756 ix86_gen_sub3_carry = gen_subdi3_carry;
3757 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3758 ix86_gen_monitor = gen_sse3_monitor64;
3759 ix86_gen_andsp = gen_anddi3;
3760 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3761 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3762 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3766 ix86_gen_leave = gen_leave;
3767 ix86_gen_add3 = gen_addsi3;
3768 ix86_gen_sub3 = gen_subsi3;
3769 ix86_gen_sub3_carry = gen_subsi3_carry;
3770 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3771 ix86_gen_monitor = gen_sse3_monitor;
3772 ix86_gen_andsp = gen_andsi3;
3773 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3774 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3775 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3779 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3781 target_flags |= MASK_CLD & ~target_flags_explicit;
3784 if (!TARGET_64BIT && flag_pic)
3786 if (flag_fentry > 0)
3787 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3791 else if (TARGET_SEH)
3793 if (flag_fentry == 0)
3794 sorry ("-mno-fentry isn%'t compatible with SEH");
3797 else if (flag_fentry < 0)
3799 #if defined(PROFILE_BEFORE_PROLOGUE)
3808 /* When not optimize for size, enable vzeroupper optimization for
3809 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3810 AVX unaligned load/store. */
3813 if (flag_expensive_optimizations
3814 && !(target_flags_explicit & MASK_VZEROUPPER))
3815 target_flags |= MASK_VZEROUPPER;
3816 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3817 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3818 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3819 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3820 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3821 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3822 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3823 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3824 target_flags |= MASK_PREFER_AVX128;
3829 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3830 target_flags &= ~MASK_VZEROUPPER;
3833 if (ix86_recip_name)
3835 char *p = ASTRDUP (ix86_recip_name);
3837 unsigned int mask, i;
3840 while ((q = strtok (p, ",")) != NULL)
3851 if (!strcmp (q, "default"))
3852 mask = RECIP_MASK_ALL;
3855 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
3856 if (!strcmp (q, recip_options[i].string))
3858 mask = recip_options[i].mask;
3862 if (i == ARRAY_SIZE (recip_options))
3864 error ("unknown option for -mrecip=%s", q);
3866 mask = RECIP_MASK_NONE;
3870 recip_mask_explicit |= mask;
3872 recip_mask &= ~mask;
3879 recip_mask |= RECIP_MASK_ALL & ~recip_mask_explicit;
3880 else if (target_flags_explicit & MASK_RECIP)
3881 recip_mask &= ~(RECIP_MASK_ALL & ~recip_mask_explicit);
3883 /* Save the initial options in case the user does function specific
3886 target_option_default_node = target_option_current_node
3887 = build_target_option_node ();
3890 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3893 function_pass_avx256_p (const_rtx val)
3898 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3901 if (GET_CODE (val) == PARALLEL)
3906 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3908 r = XVECEXP (val, 0, i);
3909 if (GET_CODE (r) == EXPR_LIST
3911 && REG_P (XEXP (r, 0))
3912 && (GET_MODE (XEXP (r, 0)) == OImode
3913 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3921 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3924 ix86_option_override (void)
3926 ix86_option_override_internal (true);
3929 /* Update register usage after having seen the compiler flags. */
3932 ix86_conditional_register_usage (void)
3937 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3939 if (fixed_regs[i] > 1)
3940 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3941 if (call_used_regs[i] > 1)
3942 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3945 /* The PIC register, if it exists, is fixed. */
3946 j = PIC_OFFSET_TABLE_REGNUM;
3947 if (j != INVALID_REGNUM)
3948 fixed_regs[j] = call_used_regs[j] = 1;
3950 /* The 64-bit MS_ABI changes the set of call-used registers. */
3951 if (TARGET_64BIT_MS_ABI)
3953 call_used_regs[SI_REG] = 0;
3954 call_used_regs[DI_REG] = 0;
3955 call_used_regs[XMM6_REG] = 0;
3956 call_used_regs[XMM7_REG] = 0;
3957 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3958 call_used_regs[i] = 0;
3961 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3962 other call-clobbered regs for 64-bit. */
3965 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3967 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3968 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3969 && call_used_regs[i])
3970 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3973 /* If MMX is disabled, squash the registers. */
3975 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3976 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3977 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3979 /* If SSE is disabled, squash the registers. */
3981 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3982 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3983 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3985 /* If the FPU is disabled, squash the registers. */
3986 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3987 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3988 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3989 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3991 /* If 32-bit, squash the 64-bit registers. */
3994 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3996 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4002 /* Save the current options */
4005 ix86_function_specific_save (struct cl_target_option *ptr)
4007 ptr->arch = ix86_arch;
4008 ptr->schedule = ix86_schedule;
4009 ptr->tune = ix86_tune;
4010 ptr->branch_cost = ix86_branch_cost;
4011 ptr->tune_defaulted = ix86_tune_defaulted;
4012 ptr->arch_specified = ix86_arch_specified;
4013 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4014 ptr->ix86_target_flags_explicit = target_flags_explicit;
4015 ptr->x_recip_mask_explicit = recip_mask_explicit;
4017 /* The fields are char but the variables are not; make sure the
4018 values fit in the fields. */
4019 gcc_assert (ptr->arch == ix86_arch);
4020 gcc_assert (ptr->schedule == ix86_schedule);
4021 gcc_assert (ptr->tune == ix86_tune);
4022 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4025 /* Restore the current options */
4028 ix86_function_specific_restore (struct cl_target_option *ptr)
4030 enum processor_type old_tune = ix86_tune;
4031 enum processor_type old_arch = ix86_arch;
4032 unsigned int ix86_arch_mask, ix86_tune_mask;
4035 ix86_arch = (enum processor_type) ptr->arch;
4036 ix86_schedule = (enum attr_cpu) ptr->schedule;
4037 ix86_tune = (enum processor_type) ptr->tune;
4038 ix86_branch_cost = ptr->branch_cost;
4039 ix86_tune_defaulted = ptr->tune_defaulted;
4040 ix86_arch_specified = ptr->arch_specified;
4041 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4042 target_flags_explicit = ptr->ix86_target_flags_explicit;
4043 recip_mask_explicit = ptr->x_recip_mask_explicit;
4045 /* Recreate the arch feature tests if the arch changed */
4046 if (old_arch != ix86_arch)
4048 ix86_arch_mask = 1u << ix86_arch;
4049 for (i = 0; i < X86_ARCH_LAST; ++i)
4050 ix86_arch_features[i]
4051 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4054 /* Recreate the tune optimization tests */
4055 if (old_tune != ix86_tune)
4057 ix86_tune_mask = 1u << ix86_tune;
4058 for (i = 0; i < X86_TUNE_LAST; ++i)
4059 ix86_tune_features[i]
4060 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4064 /* Print the current options */
4067 ix86_function_specific_print (FILE *file, int indent,
4068 struct cl_target_option *ptr)
4071 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4072 NULL, NULL, ptr->x_ix86_fpmath, false);
4074 fprintf (file, "%*sarch = %d (%s)\n",
4077 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4078 ? cpu_names[ptr->arch]
4081 fprintf (file, "%*stune = %d (%s)\n",
4084 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4085 ? cpu_names[ptr->tune]
4088 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4092 fprintf (file, "%*s%s\n", indent, "", target_string);
4093 free (target_string);
4098 /* Inner function to process the attribute((target(...))), take an argument and
4099 set the current options from the argument. If we have a list, recursively go
4103 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4104 struct gcc_options *enum_opts_set)
4109 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4110 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4111 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4112 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4113 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4129 enum ix86_opt_type type;
4134 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4135 IX86_ATTR_ISA ("abm", OPT_mabm),
4136 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4137 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4138 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4139 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4140 IX86_ATTR_ISA ("aes", OPT_maes),
4141 IX86_ATTR_ISA ("avx", OPT_mavx),
4142 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4143 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4144 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4145 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4146 IX86_ATTR_ISA ("sse", OPT_msse),
4147 IX86_ATTR_ISA ("sse2", OPT_msse2),
4148 IX86_ATTR_ISA ("sse3", OPT_msse3),
4149 IX86_ATTR_ISA ("sse4", OPT_msse4),
4150 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4151 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4152 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4153 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4154 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4155 IX86_ATTR_ISA ("fma", OPT_mfma),
4156 IX86_ATTR_ISA ("xop", OPT_mxop),
4157 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4158 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4159 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4160 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4163 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4165 /* string options */
4166 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4167 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4170 IX86_ATTR_YES ("cld",
4174 IX86_ATTR_NO ("fancy-math-387",
4175 OPT_mfancy_math_387,
4176 MASK_NO_FANCY_MATH_387),
4178 IX86_ATTR_YES ("ieee-fp",
4182 IX86_ATTR_YES ("inline-all-stringops",
4183 OPT_minline_all_stringops,
4184 MASK_INLINE_ALL_STRINGOPS),
4186 IX86_ATTR_YES ("inline-stringops-dynamically",
4187 OPT_minline_stringops_dynamically,
4188 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4190 IX86_ATTR_NO ("align-stringops",
4191 OPT_mno_align_stringops,
4192 MASK_NO_ALIGN_STRINGOPS),
4194 IX86_ATTR_YES ("recip",
4200 /* If this is a list, recurse to get the options. */
4201 if (TREE_CODE (args) == TREE_LIST)
4205 for (; args; args = TREE_CHAIN (args))
4206 if (TREE_VALUE (args)
4207 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4208 p_strings, enum_opts_set))
4214 else if (TREE_CODE (args) != STRING_CST)
4217 /* Handle multiple arguments separated by commas. */
4218 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4220 while (next_optstr && *next_optstr != '\0')
4222 char *p = next_optstr;
4224 char *comma = strchr (next_optstr, ',');
4225 const char *opt_string;
4226 size_t len, opt_len;
4231 enum ix86_opt_type type = ix86_opt_unknown;
4237 len = comma - next_optstr;
4238 next_optstr = comma + 1;
4246 /* Recognize no-xxx. */
4247 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4256 /* Find the option. */
4259 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4261 type = attrs[i].type;
4262 opt_len = attrs[i].len;
4263 if (ch == attrs[i].string[0]
4264 && ((type != ix86_opt_str && type != ix86_opt_enum)
4267 && memcmp (p, attrs[i].string, opt_len) == 0)
4270 mask = attrs[i].mask;
4271 opt_string = attrs[i].string;
4276 /* Process the option. */
4279 error ("attribute(target(\"%s\")) is unknown", orig_p);
4283 else if (type == ix86_opt_isa)
4285 struct cl_decoded_option decoded;
4287 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4288 ix86_handle_option (&global_options, &global_options_set,
4289 &decoded, input_location);
4292 else if (type == ix86_opt_yes || type == ix86_opt_no)
4294 if (type == ix86_opt_no)
4295 opt_set_p = !opt_set_p;
4298 target_flags |= mask;
4300 target_flags &= ~mask;
4303 else if (type == ix86_opt_str)
4307 error ("option(\"%s\") was already specified", opt_string);
4311 p_strings[opt] = xstrdup (p + opt_len);
4314 else if (type == ix86_opt_enum)
4319 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4321 set_option (&global_options, enum_opts_set, opt, value,
4322 p + opt_len, DK_UNSPECIFIED, input_location,
4326 error ("attribute(target(\"%s\")) is unknown", orig_p);
4338 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4341 ix86_valid_target_attribute_tree (tree args)
4343 const char *orig_arch_string = ix86_arch_string;
4344 const char *orig_tune_string = ix86_tune_string;
4345 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4346 int orig_tune_defaulted = ix86_tune_defaulted;
4347 int orig_arch_specified = ix86_arch_specified;
4348 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4351 struct cl_target_option *def
4352 = TREE_TARGET_OPTION (target_option_default_node);
4353 struct gcc_options enum_opts_set;
4355 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4357 /* Process each of the options on the chain. */
4358 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4362 /* If the changed options are different from the default, rerun
4363 ix86_option_override_internal, and then save the options away.
4364 The string options are are attribute options, and will be undone
4365 when we copy the save structure. */
4366 if (ix86_isa_flags != def->x_ix86_isa_flags
4367 || target_flags != def->x_target_flags
4368 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4369 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4370 || enum_opts_set.x_ix86_fpmath)
4372 /* If we are using the default tune= or arch=, undo the string assigned,
4373 and use the default. */
4374 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4375 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4376 else if (!orig_arch_specified)
4377 ix86_arch_string = NULL;
4379 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4380 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4381 else if (orig_tune_defaulted)
4382 ix86_tune_string = NULL;
4384 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4385 if (enum_opts_set.x_ix86_fpmath)
4386 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4387 else if (!TARGET_64BIT && TARGET_SSE)
4389 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4390 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4393 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4394 ix86_option_override_internal (false);
4396 /* Add any builtin functions with the new isa if any. */
4397 ix86_add_new_builtins (ix86_isa_flags);
4399 /* Save the current options unless we are validating options for
4401 t = build_target_option_node ();
4403 ix86_arch_string = orig_arch_string;
4404 ix86_tune_string = orig_tune_string;
4405 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4407 /* Free up memory allocated to hold the strings */
4408 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4409 free (option_strings[i]);
4415 /* Hook to validate attribute((target("string"))). */
4418 ix86_valid_target_attribute_p (tree fndecl,
4419 tree ARG_UNUSED (name),
4421 int ARG_UNUSED (flags))
4423 struct cl_target_option cur_target;
4425 tree old_optimize = build_optimization_node ();
4426 tree new_target, new_optimize;
4427 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4429 /* If the function changed the optimization levels as well as setting target
4430 options, start with the optimizations specified. */
4431 if (func_optimize && func_optimize != old_optimize)
4432 cl_optimization_restore (&global_options,
4433 TREE_OPTIMIZATION (func_optimize));
4435 /* The target attributes may also change some optimization flags, so update
4436 the optimization options if necessary. */
4437 cl_target_option_save (&cur_target, &global_options);
4438 new_target = ix86_valid_target_attribute_tree (args);
4439 new_optimize = build_optimization_node ();
4446 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4448 if (old_optimize != new_optimize)
4449 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4452 cl_target_option_restore (&global_options, &cur_target);
4454 if (old_optimize != new_optimize)
4455 cl_optimization_restore (&global_options,
4456 TREE_OPTIMIZATION (old_optimize));
4462 /* Hook to determine if one function can safely inline another. */
4465 ix86_can_inline_p (tree caller, tree callee)
4468 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4469 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4471 /* If callee has no option attributes, then it is ok to inline. */
4475 /* If caller has no option attributes, but callee does then it is not ok to
4477 else if (!caller_tree)
4482 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4483 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4485 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4486 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4488 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4489 != callee_opts->x_ix86_isa_flags)
4492 /* See if we have the same non-isa options. */
4493 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4496 /* See if arch, tune, etc. are the same. */
4497 else if (caller_opts->arch != callee_opts->arch)
4500 else if (caller_opts->tune != callee_opts->tune)
4503 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4506 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4517 /* Remember the last target of ix86_set_current_function. */
4518 static GTY(()) tree ix86_previous_fndecl;
4520 /* Establish appropriate back-end context for processing the function
4521 FNDECL. The argument might be NULL to indicate processing at top
4522 level, outside of any function scope. */
4524 ix86_set_current_function (tree fndecl)
4526 /* Only change the context if the function changes. This hook is called
4527 several times in the course of compiling a function, and we don't want to
4528 slow things down too much or call target_reinit when it isn't safe. */
4529 if (fndecl && fndecl != ix86_previous_fndecl)
4531 tree old_tree = (ix86_previous_fndecl
4532 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4535 tree new_tree = (fndecl
4536 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4539 ix86_previous_fndecl = fndecl;
4540 if (old_tree == new_tree)
4545 cl_target_option_restore (&global_options,
4546 TREE_TARGET_OPTION (new_tree));
4552 struct cl_target_option *def
4553 = TREE_TARGET_OPTION (target_option_current_node);
4555 cl_target_option_restore (&global_options, def);
4562 /* Return true if this goes in large data/bss. */
4565 ix86_in_large_data_p (tree exp)
4567 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4570 /* Functions are never large data. */
4571 if (TREE_CODE (exp) == FUNCTION_DECL)
4574 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4576 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4577 if (strcmp (section, ".ldata") == 0
4578 || strcmp (section, ".lbss") == 0)
4584 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4586 /* If this is an incomplete type with size 0, then we can't put it
4587 in data because it might be too big when completed. */
4588 if (!size || size > ix86_section_threshold)
4595 /* Switch to the appropriate section for output of DECL.
4596 DECL is either a `VAR_DECL' node or a constant of some sort.
4597 RELOC indicates whether forming the initial value of DECL requires
4598 link-time relocations. */
4600 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4604 x86_64_elf_select_section (tree decl, int reloc,
4605 unsigned HOST_WIDE_INT align)
4607 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4608 && ix86_in_large_data_p (decl))
4610 const char *sname = NULL;
4611 unsigned int flags = SECTION_WRITE;
4612 switch (categorize_decl_for_section (decl, reloc))
4617 case SECCAT_DATA_REL:
4618 sname = ".ldata.rel";
4620 case SECCAT_DATA_REL_LOCAL:
4621 sname = ".ldata.rel.local";
4623 case SECCAT_DATA_REL_RO:
4624 sname = ".ldata.rel.ro";
4626 case SECCAT_DATA_REL_RO_LOCAL:
4627 sname = ".ldata.rel.ro.local";
4631 flags |= SECTION_BSS;
4634 case SECCAT_RODATA_MERGE_STR:
4635 case SECCAT_RODATA_MERGE_STR_INIT:
4636 case SECCAT_RODATA_MERGE_CONST:
4640 case SECCAT_SRODATA:
4647 /* We don't split these for medium model. Place them into
4648 default sections and hope for best. */
4653 /* We might get called with string constants, but get_named_section
4654 doesn't like them as they are not DECLs. Also, we need to set
4655 flags in that case. */
4657 return get_section (sname, flags, NULL);
4658 return get_named_section (decl, sname, reloc);
4661 return default_elf_select_section (decl, reloc, align);
4664 /* Build up a unique section name, expressed as a
4665 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4666 RELOC indicates whether the initial value of EXP requires
4667 link-time relocations. */
4669 static void ATTRIBUTE_UNUSED
4670 x86_64_elf_unique_section (tree decl, int reloc)
4672 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4673 && ix86_in_large_data_p (decl))
4675 const char *prefix = NULL;
4676 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4677 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4679 switch (categorize_decl_for_section (decl, reloc))
4682 case SECCAT_DATA_REL:
4683 case SECCAT_DATA_REL_LOCAL:
4684 case SECCAT_DATA_REL_RO:
4685 case SECCAT_DATA_REL_RO_LOCAL:
4686 prefix = one_only ? ".ld" : ".ldata";
4689 prefix = one_only ? ".lb" : ".lbss";
4692 case SECCAT_RODATA_MERGE_STR:
4693 case SECCAT_RODATA_MERGE_STR_INIT:
4694 case SECCAT_RODATA_MERGE_CONST:
4695 prefix = one_only ? ".lr" : ".lrodata";
4697 case SECCAT_SRODATA:
4704 /* We don't split these for medium model. Place them into
4705 default sections and hope for best. */
4710 const char *name, *linkonce;
4713 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4714 name = targetm.strip_name_encoding (name);
4716 /* If we're using one_only, then there needs to be a .gnu.linkonce
4717 prefix to the section name. */
4718 linkonce = one_only ? ".gnu.linkonce" : "";
4720 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4722 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4726 default_unique_section (decl, reloc);
4729 #ifdef COMMON_ASM_OP
4730 /* This says how to output assembler code to declare an
4731 uninitialized external linkage data object.
4733 For medium model x86-64 we need to use .largecomm opcode for
4736 x86_elf_aligned_common (FILE *file,
4737 const char *name, unsigned HOST_WIDE_INT size,
4740 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4741 && size > (unsigned int)ix86_section_threshold)
4742 fputs (".largecomm\t", file);
4744 fputs (COMMON_ASM_OP, file);
4745 assemble_name (file, name);
4746 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4747 size, align / BITS_PER_UNIT);
4751 /* Utility function for targets to use in implementing
4752 ASM_OUTPUT_ALIGNED_BSS. */
4755 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4756 const char *name, unsigned HOST_WIDE_INT size,
4759 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4760 && size > (unsigned int)ix86_section_threshold)
4761 switch_to_section (get_named_section (decl, ".lbss", 0));
4763 switch_to_section (bss_section);
4764 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4765 #ifdef ASM_DECLARE_OBJECT_NAME
4766 last_assemble_variable_decl = decl;
4767 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4769 /* Standard thing is just output label for the object. */
4770 ASM_OUTPUT_LABEL (file, name);
4771 #endif /* ASM_DECLARE_OBJECT_NAME */
4772 ASM_OUTPUT_SKIP (file, size ? size : 1);
4775 /* Decide whether we must probe the stack before any space allocation
4776 on this target. It's essentially TARGET_STACK_PROBE except when
4777 -fstack-check causes the stack to be already probed differently. */
4780 ix86_target_stack_probe (void)
4782 /* Do not probe the stack twice if static stack checking is enabled. */
4783 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4786 return TARGET_STACK_PROBE;
4789 /* Decide whether we can make a sibling call to a function. DECL is the
4790 declaration of the function being targeted by the call and EXP is the
4791 CALL_EXPR representing the call. */
4794 ix86_function_ok_for_sibcall (tree decl, tree exp)
4796 tree type, decl_or_type;
4799 /* If we are generating position-independent code, we cannot sibcall
4800 optimize any indirect call, or a direct call to a global function,
4801 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4805 && (!decl || !targetm.binds_local_p (decl)))
4808 /* If we need to align the outgoing stack, then sibcalling would
4809 unalign the stack, which may break the called function. */
4810 if (ix86_minimum_incoming_stack_boundary (true)
4811 < PREFERRED_STACK_BOUNDARY)
4816 decl_or_type = decl;
4817 type = TREE_TYPE (decl);
4821 /* We're looking at the CALL_EXPR, we need the type of the function. */
4822 type = CALL_EXPR_FN (exp); /* pointer expression */
4823 type = TREE_TYPE (type); /* pointer type */
4824 type = TREE_TYPE (type); /* function type */
4825 decl_or_type = type;
4828 /* Check that the return value locations are the same. Like
4829 if we are returning floats on the 80387 register stack, we cannot
4830 make a sibcall from a function that doesn't return a float to a
4831 function that does or, conversely, from a function that does return
4832 a float to a function that doesn't; the necessary stack adjustment
4833 would not be executed. This is also the place we notice
4834 differences in the return value ABI. Note that it is ok for one
4835 of the functions to have void return type as long as the return
4836 value of the other is passed in a register. */
4837 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4838 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4840 if (STACK_REG_P (a) || STACK_REG_P (b))
4842 if (!rtx_equal_p (a, b))
4845 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4847 /* Disable sibcall if we need to generate vzeroupper after
4849 if (TARGET_VZEROUPPER
4850 && cfun->machine->callee_return_avx256_p
4851 && !cfun->machine->caller_return_avx256_p)
4854 else if (!rtx_equal_p (a, b))
4859 /* The SYSV ABI has more call-clobbered registers;
4860 disallow sibcalls from MS to SYSV. */
4861 if (cfun->machine->call_abi == MS_ABI
4862 && ix86_function_type_abi (type) == SYSV_ABI)
4867 /* If this call is indirect, we'll need to be able to use a
4868 call-clobbered register for the address of the target function.
4869 Make sure that all such registers are not used for passing
4870 parameters. Note that DLLIMPORT functions are indirect. */
4872 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4874 if (ix86_function_regparm (type, NULL) >= 3)
4876 /* ??? Need to count the actual number of registers to be used,
4877 not the possible number of registers. Fix later. */
4883 /* Otherwise okay. That also includes certain types of indirect calls. */
4887 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4888 and "sseregparm" calling convention attributes;
4889 arguments as in struct attribute_spec.handler. */
4892 ix86_handle_cconv_attribute (tree *node, tree name,
4894 int flags ATTRIBUTE_UNUSED,
4897 if (TREE_CODE (*node) != FUNCTION_TYPE
4898 && TREE_CODE (*node) != METHOD_TYPE
4899 && TREE_CODE (*node) != FIELD_DECL
4900 && TREE_CODE (*node) != TYPE_DECL)
4902 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4904 *no_add_attrs = true;
4908 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4909 if (is_attribute_p ("regparm", name))
4913 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4915 error ("fastcall and regparm attributes are not compatible");
4918 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4920 error ("regparam and thiscall attributes are not compatible");
4923 cst = TREE_VALUE (args);
4924 if (TREE_CODE (cst) != INTEGER_CST)
4926 warning (OPT_Wattributes,
4927 "%qE attribute requires an integer constant argument",
4929 *no_add_attrs = true;
4931 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4933 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4935 *no_add_attrs = true;
4943 /* Do not warn when emulating the MS ABI. */
4944 if ((TREE_CODE (*node) != FUNCTION_TYPE
4945 && TREE_CODE (*node) != METHOD_TYPE)
4946 || ix86_function_type_abi (*node) != MS_ABI)
4947 warning (OPT_Wattributes, "%qE attribute ignored",
4949 *no_add_attrs = true;
4953 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4954 if (is_attribute_p ("fastcall", name))
4956 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4958 error ("fastcall and cdecl attributes are not compatible");
4960 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4962 error ("fastcall and stdcall attributes are not compatible");
4964 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4966 error ("fastcall and regparm attributes are not compatible");
4968 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4970 error ("fastcall and thiscall attributes are not compatible");
4974 /* Can combine stdcall with fastcall (redundant), regparm and
4976 else if (is_attribute_p ("stdcall", name))
4978 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4980 error ("stdcall and cdecl attributes are not compatible");
4982 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4984 error ("stdcall and fastcall attributes are not compatible");
4986 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4988 error ("stdcall and thiscall attributes are not compatible");
4992 /* Can combine cdecl with regparm and sseregparm. */
4993 else if (is_attribute_p ("cdecl", name))
4995 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4997 error ("stdcall and cdecl attributes are not compatible");
4999 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5001 error ("fastcall and cdecl attributes are not compatible");
5003 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5005 error ("cdecl and thiscall attributes are not compatible");
5008 else if (is_attribute_p ("thiscall", name))
5010 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5011 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5013 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5015 error ("stdcall and thiscall attributes are not compatible");
5017 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5019 error ("fastcall and thiscall attributes are not compatible");
5021 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5023 error ("cdecl and thiscall attributes are not compatible");
5027 /* Can combine sseregparm with all attributes. */
5032 /* This function determines from TYPE the calling-convention. */
5035 ix86_get_callcvt (const_tree type)
5037 unsigned int ret = 0;
5042 return IX86_CALLCVT_CDECL;
5044 attrs = TYPE_ATTRIBUTES (type);
5045 if (attrs != NULL_TREE)
5047 if (lookup_attribute ("cdecl", attrs))
5048 ret |= IX86_CALLCVT_CDECL;
5049 else if (lookup_attribute ("stdcall", attrs))
5050 ret |= IX86_CALLCVT_STDCALL;
5051 else if (lookup_attribute ("fastcall", attrs))
5052 ret |= IX86_CALLCVT_FASTCALL;
5053 else if (lookup_attribute ("thiscall", attrs))
5054 ret |= IX86_CALLCVT_THISCALL;
5056 /* Regparam isn't allowed for thiscall and fastcall. */
5057 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5059 if (lookup_attribute ("regparm", attrs))
5060 ret |= IX86_CALLCVT_REGPARM;
5061 if (lookup_attribute ("sseregparm", attrs))
5062 ret |= IX86_CALLCVT_SSEREGPARM;
5065 if (IX86_BASE_CALLCVT(ret) != 0)
5069 is_stdarg = stdarg_p (type);
5070 if (TARGET_RTD && !is_stdarg)
5071 return IX86_CALLCVT_STDCALL | ret;
5075 || TREE_CODE (type) != METHOD_TYPE
5076 || ix86_function_type_abi (type) != MS_ABI)
5077 return IX86_CALLCVT_CDECL | ret;
5079 return IX86_CALLCVT_THISCALL;
5082 /* Return 0 if the attributes for two types are incompatible, 1 if they
5083 are compatible, and 2 if they are nearly compatible (which causes a
5084 warning to be generated). */
5087 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5089 unsigned int ccvt1, ccvt2;
5091 if (TREE_CODE (type1) != FUNCTION_TYPE
5092 && TREE_CODE (type1) != METHOD_TYPE)
5095 ccvt1 = ix86_get_callcvt (type1);
5096 ccvt2 = ix86_get_callcvt (type2);
5099 if (ix86_function_regparm (type1, NULL)
5100 != ix86_function_regparm (type2, NULL))
5106 /* Return the regparm value for a function with the indicated TYPE and DECL.
5107 DECL may be NULL when calling function indirectly
5108 or considering a libcall. */
5111 ix86_function_regparm (const_tree type, const_tree decl)
5118 return (ix86_function_type_abi (type) == SYSV_ABI
5119 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5120 ccvt = ix86_get_callcvt (type);
5121 regparm = ix86_regparm;
5123 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5125 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5128 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5132 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5134 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5137 /* Use register calling convention for local functions when possible. */
5139 && TREE_CODE (decl) == FUNCTION_DECL
5141 && !(profile_flag && !flag_fentry))
5143 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5144 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5145 if (i && i->local && i->can_change_signature)
5147 int local_regparm, globals = 0, regno;
5149 /* Make sure no regparm register is taken by a
5150 fixed register variable. */
5151 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5152 if (fixed_regs[local_regparm])
5155 /* We don't want to use regparm(3) for nested functions as
5156 these use a static chain pointer in the third argument. */
5157 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5160 /* In 32-bit mode save a register for the split stack. */
5161 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5164 /* Each fixed register usage increases register pressure,
5165 so less registers should be used for argument passing.
5166 This functionality can be overriden by an explicit
5168 for (regno = 0; regno <= DI_REG; regno++)
5169 if (fixed_regs[regno])
5173 = globals < local_regparm ? local_regparm - globals : 0;
5175 if (local_regparm > regparm)
5176 regparm = local_regparm;
5183 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5184 DFmode (2) arguments in SSE registers for a function with the
5185 indicated TYPE and DECL. DECL may be NULL when calling function
5186 indirectly or considering a libcall. Otherwise return 0. */
5189 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5191 gcc_assert (!TARGET_64BIT);
5193 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5194 by the sseregparm attribute. */
5195 if (TARGET_SSEREGPARM
5196 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5203 error ("calling %qD with attribute sseregparm without "
5204 "SSE/SSE2 enabled", decl);
5206 error ("calling %qT with attribute sseregparm without "
5207 "SSE/SSE2 enabled", type);
5215 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5216 (and DFmode for SSE2) arguments in SSE registers. */
5217 if (decl && TARGET_SSE_MATH && optimize
5218 && !(profile_flag && !flag_fentry))
5220 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5221 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5222 if (i && i->local && i->can_change_signature)
5223 return TARGET_SSE2 ? 2 : 1;
5229 /* Return true if EAX is live at the start of the function. Used by
5230 ix86_expand_prologue to determine if we need special help before
5231 calling allocate_stack_worker. */
5234 ix86_eax_live_at_start_p (void)
5236 /* Cheat. Don't bother working forward from ix86_function_regparm
5237 to the function type to whether an actual argument is located in
5238 eax. Instead just look at cfg info, which is still close enough
5239 to correct at this point. This gives false positives for broken
5240 functions that might use uninitialized data that happens to be
5241 allocated in eax, but who cares? */
5242 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5246 ix86_keep_aggregate_return_pointer (tree fntype)
5252 attr = lookup_attribute ("callee_pop_aggregate_return",
5253 TYPE_ATTRIBUTES (fntype));
5255 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5257 /* For 32-bit MS-ABI the default is to keep aggregate
5259 if (ix86_function_type_abi (fntype) == MS_ABI)
5262 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5265 /* Value is the number of bytes of arguments automatically
5266 popped when returning from a subroutine call.
5267 FUNDECL is the declaration node of the function (as a tree),
5268 FUNTYPE is the data type of the function (as a tree),
5269 or for a library call it is an identifier node for the subroutine name.
5270 SIZE is the number of bytes of arguments passed on the stack.
5272 On the 80386, the RTD insn may be used to pop them if the number
5273 of args is fixed, but if the number is variable then the caller
5274 must pop them all. RTD can't be used for library calls now
5275 because the library is compiled with the Unix compiler.
5276 Use of RTD is a selectable option, since it is incompatible with
5277 standard Unix calling sequences. If the option is not selected,
5278 the caller must always pop the args.
5280 The attribute stdcall is equivalent to RTD on a per module basis. */
5283 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5287 /* None of the 64-bit ABIs pop arguments. */
5291 ccvt = ix86_get_callcvt (funtype);
5293 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5294 | IX86_CALLCVT_THISCALL)) != 0
5295 && ! stdarg_p (funtype))
5298 /* Lose any fake structure return argument if it is passed on the stack. */
5299 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5300 && !ix86_keep_aggregate_return_pointer (funtype))
5302 int nregs = ix86_function_regparm (funtype, fundecl);
5304 return GET_MODE_SIZE (Pmode);
5310 /* Argument support functions. */
5312 /* Return true when register may be used to pass function parameters. */
5314 ix86_function_arg_regno_p (int regno)
5317 const int *parm_regs;
5322 return (regno < REGPARM_MAX
5323 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5325 return (regno < REGPARM_MAX
5326 || (TARGET_MMX && MMX_REGNO_P (regno)
5327 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5328 || (TARGET_SSE && SSE_REGNO_P (regno)
5329 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5334 if (SSE_REGNO_P (regno) && TARGET_SSE)
5339 if (TARGET_SSE && SSE_REGNO_P (regno)
5340 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5344 /* TODO: The function should depend on current function ABI but
5345 builtins.c would need updating then. Therefore we use the
5348 /* RAX is used as hidden argument to va_arg functions. */
5349 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5352 if (ix86_abi == MS_ABI)
5353 parm_regs = x86_64_ms_abi_int_parameter_registers;
5355 parm_regs = x86_64_int_parameter_registers;
5356 for (i = 0; i < (ix86_abi == MS_ABI
5357 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5358 if (regno == parm_regs[i])
5363 /* Return if we do not know how to pass TYPE solely in registers. */
5366 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5368 if (must_pass_in_stack_var_size_or_pad (mode, type))
5371 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5372 The layout_type routine is crafty and tries to trick us into passing
5373 currently unsupported vector types on the stack by using TImode. */
5374 return (!TARGET_64BIT && mode == TImode
5375 && type && TREE_CODE (type) != VECTOR_TYPE);
5378 /* It returns the size, in bytes, of the area reserved for arguments passed
5379 in registers for the function represented by fndecl dependent to the used
5382 ix86_reg_parm_stack_space (const_tree fndecl)
5384 enum calling_abi call_abi = SYSV_ABI;
5385 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5386 call_abi = ix86_function_abi (fndecl);
5388 call_abi = ix86_function_type_abi (fndecl);
5389 if (TARGET_64BIT && call_abi == MS_ABI)
5394 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5397 ix86_function_type_abi (const_tree fntype)
5399 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5401 enum calling_abi abi = ix86_abi;
5402 if (abi == SYSV_ABI)
5404 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5407 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5415 ix86_function_ms_hook_prologue (const_tree fn)
5417 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5419 if (decl_function_context (fn) != NULL_TREE)
5420 error_at (DECL_SOURCE_LOCATION (fn),
5421 "ms_hook_prologue is not compatible with nested function");
5428 static enum calling_abi
5429 ix86_function_abi (const_tree fndecl)
5433 return ix86_function_type_abi (TREE_TYPE (fndecl));
5436 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5439 ix86_cfun_abi (void)
5443 return cfun->machine->call_abi;
5446 /* Write the extra assembler code needed to declare a function properly. */
5449 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5452 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5456 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5457 unsigned int filler_cc = 0xcccccccc;
5459 for (i = 0; i < filler_count; i += 4)
5460 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5463 #ifdef SUBTARGET_ASM_UNWIND_INIT
5464 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5467 ASM_OUTPUT_LABEL (asm_out_file, fname);
5469 /* Output magic byte marker, if hot-patch attribute is set. */
5474 /* leaq [%rsp + 0], %rsp */
5475 asm_fprintf (asm_out_file, ASM_BYTE
5476 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5480 /* movl.s %edi, %edi
5482 movl.s %esp, %ebp */
5483 asm_fprintf (asm_out_file, ASM_BYTE
5484 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5490 extern void init_regs (void);
5492 /* Implementation of call abi switching target hook. Specific to FNDECL
5493 the specific call register sets are set. See also
5494 ix86_conditional_register_usage for more details. */
5496 ix86_call_abi_override (const_tree fndecl)
5498 if (fndecl == NULL_TREE)
5499 cfun->machine->call_abi = ix86_abi;
5501 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5504 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5505 expensive re-initialization of init_regs each time we switch function context
5506 since this is needed only during RTL expansion. */
5508 ix86_maybe_switch_abi (void)
5511 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5515 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5516 for a call to a function whose data type is FNTYPE.
5517 For a library call, FNTYPE is 0. */
5520 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5521 tree fntype, /* tree ptr for function decl */
5522 rtx libname, /* SYMBOL_REF of library name or 0 */
5526 struct cgraph_local_info *i;
5529 memset (cum, 0, sizeof (*cum));
5531 /* Initialize for the current callee. */
5534 cfun->machine->callee_pass_avx256_p = false;
5535 cfun->machine->callee_return_avx256_p = false;
5540 i = cgraph_local_info (fndecl);
5541 cum->call_abi = ix86_function_abi (fndecl);
5542 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5547 cum->call_abi = ix86_function_type_abi (fntype);
5549 fnret_type = TREE_TYPE (fntype);
5554 if (TARGET_VZEROUPPER && fnret_type)
5556 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5558 if (function_pass_avx256_p (fnret_value))
5560 /* The return value of this function uses 256bit AVX modes. */
5562 cfun->machine->callee_return_avx256_p = true;
5564 cfun->machine->caller_return_avx256_p = true;
5568 cum->caller = caller;
5570 /* Set up the number of registers to use for passing arguments. */
5572 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5573 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5574 "or subtarget optimization implying it");
5575 cum->nregs = ix86_regparm;
5578 cum->nregs = (cum->call_abi == SYSV_ABI
5579 ? X86_64_REGPARM_MAX
5580 : X86_64_MS_REGPARM_MAX);
5584 cum->sse_nregs = SSE_REGPARM_MAX;
5587 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5588 ? X86_64_SSE_REGPARM_MAX
5589 : X86_64_MS_SSE_REGPARM_MAX);
5593 cum->mmx_nregs = MMX_REGPARM_MAX;
5594 cum->warn_avx = true;
5595 cum->warn_sse = true;
5596 cum->warn_mmx = true;
5598 /* Because type might mismatch in between caller and callee, we need to
5599 use actual type of function for local calls.
5600 FIXME: cgraph_analyze can be told to actually record if function uses
5601 va_start so for local functions maybe_vaarg can be made aggressive
5603 FIXME: once typesytem is fixed, we won't need this code anymore. */
5604 if (i && i->local && i->can_change_signature)
5605 fntype = TREE_TYPE (fndecl);
5606 cum->maybe_vaarg = (fntype
5607 ? (!prototype_p (fntype) || stdarg_p (fntype))
5612 /* If there are variable arguments, then we won't pass anything
5613 in registers in 32-bit mode. */
5614 if (stdarg_p (fntype))
5625 /* Use ecx and edx registers if function has fastcall attribute,
5626 else look for regparm information. */
5629 unsigned int ccvt = ix86_get_callcvt (fntype);
5630 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5633 cum->fastcall = 1; /* Same first register as in fastcall. */
5635 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5641 cum->nregs = ix86_function_regparm (fntype, fndecl);
5644 /* Set up the number of SSE registers used for passing SFmode
5645 and DFmode arguments. Warn for mismatching ABI. */
5646 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5650 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5651 But in the case of vector types, it is some vector mode.
5653 When we have only some of our vector isa extensions enabled, then there
5654 are some modes for which vector_mode_supported_p is false. For these
5655 modes, the generic vector support in gcc will choose some non-vector mode
5656 in order to implement the type. By computing the natural mode, we'll
5657 select the proper ABI location for the operand and not depend on whatever
5658 the middle-end decides to do with these vector types.
5660 The midde-end can't deal with the vector types > 16 bytes. In this
5661 case, we return the original mode and warn ABI change if CUM isn't
5664 static enum machine_mode
5665 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5667 enum machine_mode mode = TYPE_MODE (type);
5669 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5671 HOST_WIDE_INT size = int_size_in_bytes (type);
5672 if ((size == 8 || size == 16 || size == 32)
5673 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5674 && TYPE_VECTOR_SUBPARTS (type) > 1)
5676 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5678 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5679 mode = MIN_MODE_VECTOR_FLOAT;
5681 mode = MIN_MODE_VECTOR_INT;
5683 /* Get the mode which has this inner mode and number of units. */
5684 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5685 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5686 && GET_MODE_INNER (mode) == innermode)
5688 if (size == 32 && !TARGET_AVX)
5690 static bool warnedavx;
5697 warning (0, "AVX vector argument without AVX "
5698 "enabled changes the ABI");
5700 return TYPE_MODE (type);
5713 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5714 this may not agree with the mode that the type system has chosen for the
5715 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5716 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5719 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5724 if (orig_mode != BLKmode)
5725 tmp = gen_rtx_REG (orig_mode, regno);
5728 tmp = gen_rtx_REG (mode, regno);
5729 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5730 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5736 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5737 of this code is to classify each 8bytes of incoming argument by the register
5738 class and assign registers accordingly. */
5740 /* Return the union class of CLASS1 and CLASS2.
5741 See the x86-64 PS ABI for details. */
5743 static enum x86_64_reg_class
5744 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5746 /* Rule #1: If both classes are equal, this is the resulting class. */
5747 if (class1 == class2)
5750 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5752 if (class1 == X86_64_NO_CLASS)
5754 if (class2 == X86_64_NO_CLASS)
5757 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5758 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5759 return X86_64_MEMORY_CLASS;
5761 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5762 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5763 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5764 return X86_64_INTEGERSI_CLASS;
5765 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5766 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5767 return X86_64_INTEGER_CLASS;
5769 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5771 if (class1 == X86_64_X87_CLASS
5772 || class1 == X86_64_X87UP_CLASS
5773 || class1 == X86_64_COMPLEX_X87_CLASS
5774 || class2 == X86_64_X87_CLASS
5775 || class2 == X86_64_X87UP_CLASS
5776 || class2 == X86_64_COMPLEX_X87_CLASS)
5777 return X86_64_MEMORY_CLASS;
5779 /* Rule #6: Otherwise class SSE is used. */
5780 return X86_64_SSE_CLASS;
5783 /* Classify the argument of type TYPE and mode MODE.
5784 CLASSES will be filled by the register class used to pass each word
5785 of the operand. The number of words is returned. In case the parameter
5786 should be passed in memory, 0 is returned. As a special case for zero
5787 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5789 BIT_OFFSET is used internally for handling records and specifies offset
5790 of the offset in bits modulo 256 to avoid overflow cases.
5792 See the x86-64 PS ABI for details.
5796 classify_argument (enum machine_mode mode, const_tree type,
5797 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5799 HOST_WIDE_INT bytes =
5800 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5801 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5803 /* Variable sized entities are always passed/returned in memory. */
5807 if (mode != VOIDmode
5808 && targetm.calls.must_pass_in_stack (mode, type))
5811 if (type && AGGREGATE_TYPE_P (type))
5815 enum x86_64_reg_class subclasses[MAX_CLASSES];
5817 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5821 for (i = 0; i < words; i++)
5822 classes[i] = X86_64_NO_CLASS;
5824 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5825 signalize memory class, so handle it as special case. */
5828 classes[0] = X86_64_NO_CLASS;
5832 /* Classify each field of record and merge classes. */
5833 switch (TREE_CODE (type))
5836 /* And now merge the fields of structure. */
5837 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5839 if (TREE_CODE (field) == FIELD_DECL)
5843 if (TREE_TYPE (field) == error_mark_node)
5846 /* Bitfields are always classified as integer. Handle them
5847 early, since later code would consider them to be
5848 misaligned integers. */
5849 if (DECL_BIT_FIELD (field))
5851 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5852 i < ((int_bit_position (field) + (bit_offset % 64))
5853 + tree_low_cst (DECL_SIZE (field), 0)
5856 merge_classes (X86_64_INTEGER_CLASS,
5863 type = TREE_TYPE (field);
5865 /* Flexible array member is ignored. */
5866 if (TYPE_MODE (type) == BLKmode
5867 && TREE_CODE (type) == ARRAY_TYPE
5868 && TYPE_SIZE (type) == NULL_TREE
5869 && TYPE_DOMAIN (type) != NULL_TREE
5870 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5875 if (!warned && warn_psabi)
5878 inform (input_location,
5879 "the ABI of passing struct with"
5880 " a flexible array member has"
5881 " changed in GCC 4.4");
5885 num = classify_argument (TYPE_MODE (type), type,
5887 (int_bit_position (field)
5888 + bit_offset) % 256);
5891 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5892 for (i = 0; i < num && (i + pos) < words; i++)
5894 merge_classes (subclasses[i], classes[i + pos]);
5901 /* Arrays are handled as small records. */
5904 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5905 TREE_TYPE (type), subclasses, bit_offset);
5909 /* The partial classes are now full classes. */
5910 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5911 subclasses[0] = X86_64_SSE_CLASS;
5912 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5913 && !((bit_offset % 64) == 0 && bytes == 4))
5914 subclasses[0] = X86_64_INTEGER_CLASS;
5916 for (i = 0; i < words; i++)
5917 classes[i] = subclasses[i % num];
5922 case QUAL_UNION_TYPE:
5923 /* Unions are similar to RECORD_TYPE but offset is always 0.
5925 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5927 if (TREE_CODE (field) == FIELD_DECL)
5931 if (TREE_TYPE (field) == error_mark_node)
5934 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5935 TREE_TYPE (field), subclasses,
5939 for (i = 0; i < num; i++)
5940 classes[i] = merge_classes (subclasses[i], classes[i]);
5951 /* When size > 16 bytes, if the first one isn't
5952 X86_64_SSE_CLASS or any other ones aren't
5953 X86_64_SSEUP_CLASS, everything should be passed in
5955 if (classes[0] != X86_64_SSE_CLASS)
5958 for (i = 1; i < words; i++)
5959 if (classes[i] != X86_64_SSEUP_CLASS)
5963 /* Final merger cleanup. */
5964 for (i = 0; i < words; i++)
5966 /* If one class is MEMORY, everything should be passed in
5968 if (classes[i] == X86_64_MEMORY_CLASS)
5971 /* The X86_64_SSEUP_CLASS should be always preceded by
5972 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5973 if (classes[i] == X86_64_SSEUP_CLASS
5974 && classes[i - 1] != X86_64_SSE_CLASS
5975 && classes[i - 1] != X86_64_SSEUP_CLASS)
5977 /* The first one should never be X86_64_SSEUP_CLASS. */
5978 gcc_assert (i != 0);
5979 classes[i] = X86_64_SSE_CLASS;
5982 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5983 everything should be passed in memory. */
5984 if (classes[i] == X86_64_X87UP_CLASS
5985 && (classes[i - 1] != X86_64_X87_CLASS))
5989 /* The first one should never be X86_64_X87UP_CLASS. */
5990 gcc_assert (i != 0);
5991 if (!warned && warn_psabi)
5994 inform (input_location,
5995 "the ABI of passing union with long double"
5996 " has changed in GCC 4.4");
6004 /* Compute alignment needed. We align all types to natural boundaries with
6005 exception of XFmode that is aligned to 64bits. */
6006 if (mode != VOIDmode && mode != BLKmode)
6008 int mode_alignment = GET_MODE_BITSIZE (mode);
6011 mode_alignment = 128;
6012 else if (mode == XCmode)
6013 mode_alignment = 256;
6014 if (COMPLEX_MODE_P (mode))
6015 mode_alignment /= 2;
6016 /* Misaligned fields are always returned in memory. */
6017 if (bit_offset % mode_alignment)
6021 /* for V1xx modes, just use the base mode */
6022 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6023 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6024 mode = GET_MODE_INNER (mode);
6026 /* Classification of atomic types. */
6031 classes[0] = X86_64_SSE_CLASS;
6034 classes[0] = X86_64_SSE_CLASS;
6035 classes[1] = X86_64_SSEUP_CLASS;
6045 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6049 classes[0] = X86_64_INTEGERSI_CLASS;
6052 else if (size <= 64)
6054 classes[0] = X86_64_INTEGER_CLASS;
6057 else if (size <= 64+32)
6059 classes[0] = X86_64_INTEGER_CLASS;
6060 classes[1] = X86_64_INTEGERSI_CLASS;
6063 else if (size <= 64+64)
6065 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6073 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6077 /* OImode shouldn't be used directly. */
6082 if (!(bit_offset % 64))
6083 classes[0] = X86_64_SSESF_CLASS;
6085 classes[0] = X86_64_SSE_CLASS;
6088 classes[0] = X86_64_SSEDF_CLASS;
6091 classes[0] = X86_64_X87_CLASS;
6092 classes[1] = X86_64_X87UP_CLASS;
6095 classes[0] = X86_64_SSE_CLASS;
6096 classes[1] = X86_64_SSEUP_CLASS;
6099 classes[0] = X86_64_SSE_CLASS;
6100 if (!(bit_offset % 64))
6106 if (!warned && warn_psabi)
6109 inform (input_location,
6110 "the ABI of passing structure with complex float"
6111 " member has changed in GCC 4.4");
6113 classes[1] = X86_64_SSESF_CLASS;
6117 classes[0] = X86_64_SSEDF_CLASS;
6118 classes[1] = X86_64_SSEDF_CLASS;
6121 classes[0] = X86_64_COMPLEX_X87_CLASS;
6124 /* This modes is larger than 16 bytes. */
6132 classes[0] = X86_64_SSE_CLASS;
6133 classes[1] = X86_64_SSEUP_CLASS;
6134 classes[2] = X86_64_SSEUP_CLASS;
6135 classes[3] = X86_64_SSEUP_CLASS;
6143 classes[0] = X86_64_SSE_CLASS;
6144 classes[1] = X86_64_SSEUP_CLASS;
6152 classes[0] = X86_64_SSE_CLASS;
6158 gcc_assert (VECTOR_MODE_P (mode));
6163 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6165 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6166 classes[0] = X86_64_INTEGERSI_CLASS;
6168 classes[0] = X86_64_INTEGER_CLASS;
6169 classes[1] = X86_64_INTEGER_CLASS;
6170 return 1 + (bytes > 8);
6174 /* Examine the argument and return set number of register required in each
6175 class. Return 0 iff parameter should be passed in memory. */
6177 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6178 int *int_nregs, int *sse_nregs)
6180 enum x86_64_reg_class regclass[MAX_CLASSES];
6181 int n = classify_argument (mode, type, regclass, 0);
6187 for (n--; n >= 0; n--)
6188 switch (regclass[n])
6190 case X86_64_INTEGER_CLASS:
6191 case X86_64_INTEGERSI_CLASS:
6194 case X86_64_SSE_CLASS:
6195 case X86_64_SSESF_CLASS:
6196 case X86_64_SSEDF_CLASS:
6199 case X86_64_NO_CLASS:
6200 case X86_64_SSEUP_CLASS:
6202 case X86_64_X87_CLASS:
6203 case X86_64_X87UP_CLASS:
6207 case X86_64_COMPLEX_X87_CLASS:
6208 return in_return ? 2 : 0;
6209 case X86_64_MEMORY_CLASS:
6215 /* Construct container for the argument used by GCC interface. See
6216 FUNCTION_ARG for the detailed description. */
6219 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6220 const_tree type, int in_return, int nintregs, int nsseregs,
6221 const int *intreg, int sse_regno)
6223 /* The following variables hold the static issued_error state. */
6224 static bool issued_sse_arg_error;
6225 static bool issued_sse_ret_error;
6226 static bool issued_x87_ret_error;
6228 enum machine_mode tmpmode;
6230 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6231 enum x86_64_reg_class regclass[MAX_CLASSES];
6235 int needed_sseregs, needed_intregs;
6236 rtx exp[MAX_CLASSES];
6239 n = classify_argument (mode, type, regclass, 0);
6242 if (!examine_argument (mode, type, in_return, &needed_intregs,
6245 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6248 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6249 some less clueful developer tries to use floating-point anyway. */
6250 if (needed_sseregs && !TARGET_SSE)
6254 if (!issued_sse_ret_error)
6256 error ("SSE register return with SSE disabled");
6257 issued_sse_ret_error = true;
6260 else if (!issued_sse_arg_error)
6262 error ("SSE register argument with SSE disabled");
6263 issued_sse_arg_error = true;
6268 /* Likewise, error if the ABI requires us to return values in the
6269 x87 registers and the user specified -mno-80387. */
6270 if (!TARGET_80387 && in_return)
6271 for (i = 0; i < n; i++)
6272 if (regclass[i] == X86_64_X87_CLASS
6273 || regclass[i] == X86_64_X87UP_CLASS
6274 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6276 if (!issued_x87_ret_error)
6278 error ("x87 register return with x87 disabled");
6279 issued_x87_ret_error = true;
6284 /* First construct simple cases. Avoid SCmode, since we want to use
6285 single register to pass this type. */
6286 if (n == 1 && mode != SCmode)
6287 switch (regclass[0])
6289 case X86_64_INTEGER_CLASS:
6290 case X86_64_INTEGERSI_CLASS:
6291 return gen_rtx_REG (mode, intreg[0]);
6292 case X86_64_SSE_CLASS:
6293 case X86_64_SSESF_CLASS:
6294 case X86_64_SSEDF_CLASS:
6295 if (mode != BLKmode)
6296 return gen_reg_or_parallel (mode, orig_mode,
6297 SSE_REGNO (sse_regno));
6299 case X86_64_X87_CLASS:
6300 case X86_64_COMPLEX_X87_CLASS:
6301 return gen_rtx_REG (mode, FIRST_STACK_REG);
6302 case X86_64_NO_CLASS:
6303 /* Zero sized array, struct or class. */
6308 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6309 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6310 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6312 && regclass[0] == X86_64_SSE_CLASS
6313 && regclass[1] == X86_64_SSEUP_CLASS
6314 && regclass[2] == X86_64_SSEUP_CLASS
6315 && regclass[3] == X86_64_SSEUP_CLASS
6317 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6320 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6321 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6322 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6323 && regclass[1] == X86_64_INTEGER_CLASS
6324 && (mode == CDImode || mode == TImode || mode == TFmode)
6325 && intreg[0] + 1 == intreg[1])
6326 return gen_rtx_REG (mode, intreg[0]);
6328 /* Otherwise figure out the entries of the PARALLEL. */
6329 for (i = 0; i < n; i++)
6333 switch (regclass[i])
6335 case X86_64_NO_CLASS:
6337 case X86_64_INTEGER_CLASS:
6338 case X86_64_INTEGERSI_CLASS:
6339 /* Merge TImodes on aligned occasions here too. */
6340 if (i * 8 + 8 > bytes)
6341 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6342 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6346 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6347 if (tmpmode == BLKmode)
6349 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6350 gen_rtx_REG (tmpmode, *intreg),
6354 case X86_64_SSESF_CLASS:
6355 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6356 gen_rtx_REG (SFmode,
6357 SSE_REGNO (sse_regno)),
6361 case X86_64_SSEDF_CLASS:
6362 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6363 gen_rtx_REG (DFmode,
6364 SSE_REGNO (sse_regno)),
6368 case X86_64_SSE_CLASS:
6376 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6386 && regclass[1] == X86_64_SSEUP_CLASS
6387 && regclass[2] == X86_64_SSEUP_CLASS
6388 && regclass[3] == X86_64_SSEUP_CLASS);
6395 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6396 gen_rtx_REG (tmpmode,
6397 SSE_REGNO (sse_regno)),
6406 /* Empty aligned struct, union or class. */
6410 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6411 for (i = 0; i < nexps; i++)
6412 XVECEXP (ret, 0, i) = exp [i];
6416 /* Update the data in CUM to advance over an argument of mode MODE
6417 and data type TYPE. (TYPE is null for libcalls where that information
6418 may not be available.) */
6421 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6422 const_tree type, HOST_WIDE_INT bytes,
6423 HOST_WIDE_INT words)
6439 cum->words += words;
6440 cum->nregs -= words;
6441 cum->regno += words;
6443 if (cum->nregs <= 0)
6451 /* OImode shouldn't be used directly. */
6455 if (cum->float_in_sse < 2)
6458 if (cum->float_in_sse < 1)
6475 if (!type || !AGGREGATE_TYPE_P (type))
6477 cum->sse_words += words;
6478 cum->sse_nregs -= 1;
6479 cum->sse_regno += 1;
6480 if (cum->sse_nregs <= 0)
6494 if (!type || !AGGREGATE_TYPE_P (type))
6496 cum->mmx_words += words;
6497 cum->mmx_nregs -= 1;
6498 cum->mmx_regno += 1;
6499 if (cum->mmx_nregs <= 0)
6510 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6511 const_tree type, HOST_WIDE_INT words, bool named)
6513 int int_nregs, sse_nregs;
6515 /* Unnamed 256bit vector mode parameters are passed on stack. */
6516 if (!named && VALID_AVX256_REG_MODE (mode))
6519 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6520 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6522 cum->nregs -= int_nregs;
6523 cum->sse_nregs -= sse_nregs;
6524 cum->regno += int_nregs;
6525 cum->sse_regno += sse_nregs;
6529 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6530 cum->words = (cum->words + align - 1) & ~(align - 1);
6531 cum->words += words;
6536 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6537 HOST_WIDE_INT words)
6539 /* Otherwise, this should be passed indirect. */
6540 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6542 cum->words += words;
6550 /* Update the data in CUM to advance over an argument of mode MODE and
6551 data type TYPE. (TYPE is null for libcalls where that information
6552 may not be available.) */
6555 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6556 const_tree type, bool named)
6558 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6559 HOST_WIDE_INT bytes, words;
6561 if (mode == BLKmode)
6562 bytes = int_size_in_bytes (type);
6564 bytes = GET_MODE_SIZE (mode);
6565 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6568 mode = type_natural_mode (type, NULL);
6570 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6571 function_arg_advance_ms_64 (cum, bytes, words);
6572 else if (TARGET_64BIT)
6573 function_arg_advance_64 (cum, mode, type, words, named);
6575 function_arg_advance_32 (cum, mode, type, bytes, words);
6578 /* Define where to put the arguments to a function.
6579 Value is zero to push the argument on the stack,
6580 or a hard register in which to store the argument.
6582 MODE is the argument's machine mode.
6583 TYPE is the data type of the argument (as a tree).
6584 This is null for libcalls where that information may
6586 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6587 the preceding args and about the function being called.
6588 NAMED is nonzero if this argument is a named parameter
6589 (otherwise it is an extra parameter matching an ellipsis). */
6592 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6593 enum machine_mode orig_mode, const_tree type,
6594 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6596 static bool warnedsse, warnedmmx;
6598 /* Avoid the AL settings for the Unix64 ABI. */
6599 if (mode == VOIDmode)
6615 if (words <= cum->nregs)
6617 int regno = cum->regno;
6619 /* Fastcall allocates the first two DWORD (SImode) or
6620 smaller arguments to ECX and EDX if it isn't an
6626 || (type && AGGREGATE_TYPE_P (type)))
6629 /* ECX not EAX is the first allocated register. */
6630 if (regno == AX_REG)
6633 return gen_rtx_REG (mode, regno);
6638 if (cum->float_in_sse < 2)
6641 if (cum->float_in_sse < 1)
6645 /* In 32bit, we pass TImode in xmm registers. */
6652 if (!type || !AGGREGATE_TYPE_P (type))
6654 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6657 warning (0, "SSE vector argument without SSE enabled "
6661 return gen_reg_or_parallel (mode, orig_mode,
6662 cum->sse_regno + FIRST_SSE_REG);
6667 /* OImode shouldn't be used directly. */
6676 if (!type || !AGGREGATE_TYPE_P (type))
6679 return gen_reg_or_parallel (mode, orig_mode,
6680 cum->sse_regno + FIRST_SSE_REG);
6690 if (!type || !AGGREGATE_TYPE_P (type))
6692 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6695 warning (0, "MMX vector argument without MMX enabled "
6699 return gen_reg_or_parallel (mode, orig_mode,
6700 cum->mmx_regno + FIRST_MMX_REG);
6709 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6710 enum machine_mode orig_mode, const_tree type, bool named)
6712 /* Handle a hidden AL argument containing number of registers
6713 for varargs x86-64 functions. */
6714 if (mode == VOIDmode)
6715 return GEN_INT (cum->maybe_vaarg
6716 ? (cum->sse_nregs < 0
6717 ? X86_64_SSE_REGPARM_MAX
6732 /* Unnamed 256bit vector mode parameters are passed on stack. */
6738 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6740 &x86_64_int_parameter_registers [cum->regno],
6745 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6746 enum machine_mode orig_mode, bool named,
6747 HOST_WIDE_INT bytes)
6751 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6752 We use value of -2 to specify that current function call is MSABI. */
6753 if (mode == VOIDmode)
6754 return GEN_INT (-2);
6756 /* If we've run out of registers, it goes on the stack. */
6757 if (cum->nregs == 0)
6760 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6762 /* Only floating point modes are passed in anything but integer regs. */
6763 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6766 regno = cum->regno + FIRST_SSE_REG;
6771 /* Unnamed floating parameters are passed in both the
6772 SSE and integer registers. */
6773 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6774 t2 = gen_rtx_REG (mode, regno);
6775 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6776 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6777 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6780 /* Handle aggregated types passed in register. */
6781 if (orig_mode == BLKmode)
6783 if (bytes > 0 && bytes <= 8)
6784 mode = (bytes > 4 ? DImode : SImode);
6785 if (mode == BLKmode)
6789 return gen_reg_or_parallel (mode, orig_mode, regno);
6792 /* Return where to put the arguments to a function.
6793 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6795 MODE is the argument's machine mode. TYPE is the data type of the
6796 argument. It is null for libcalls where that information may not be
6797 available. CUM gives information about the preceding args and about
6798 the function being called. NAMED is nonzero if this argument is a
6799 named parameter (otherwise it is an extra parameter matching an
6803 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6804 const_tree type, bool named)
6806 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6807 enum machine_mode mode = omode;
6808 HOST_WIDE_INT bytes, words;
6811 if (mode == BLKmode)
6812 bytes = int_size_in_bytes (type);
6814 bytes = GET_MODE_SIZE (mode);
6815 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6817 /* To simplify the code below, represent vector types with a vector mode
6818 even if MMX/SSE are not active. */
6819 if (type && TREE_CODE (type) == VECTOR_TYPE)
6820 mode = type_natural_mode (type, cum);
6822 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6823 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6824 else if (TARGET_64BIT)
6825 arg = function_arg_64 (cum, mode, omode, type, named);
6827 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6829 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6831 /* This argument uses 256bit AVX modes. */
6833 cfun->machine->callee_pass_avx256_p = true;
6835 cfun->machine->caller_pass_avx256_p = true;
6841 /* A C expression that indicates when an argument must be passed by
6842 reference. If nonzero for an argument, a copy of that argument is
6843 made in memory and a pointer to the argument is passed instead of
6844 the argument itself. The pointer is passed in whatever way is
6845 appropriate for passing a pointer to that type. */
6848 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6849 enum machine_mode mode ATTRIBUTE_UNUSED,
6850 const_tree type, bool named ATTRIBUTE_UNUSED)
6852 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6854 /* See Windows x64 Software Convention. */
6855 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6857 int msize = (int) GET_MODE_SIZE (mode);
6860 /* Arrays are passed by reference. */
6861 if (TREE_CODE (type) == ARRAY_TYPE)
6864 if (AGGREGATE_TYPE_P (type))
6866 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6867 are passed by reference. */
6868 msize = int_size_in_bytes (type);
6872 /* __m128 is passed by reference. */
6874 case 1: case 2: case 4: case 8:
6880 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6886 /* Return true when TYPE should be 128bit aligned for 32bit argument
6887 passing ABI. XXX: This function is obsolete and is only used for
6888 checking psABI compatibility with previous versions of GCC. */
6891 ix86_compat_aligned_value_p (const_tree type)
6893 enum machine_mode mode = TYPE_MODE (type);
6894 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6898 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6900 if (TYPE_ALIGN (type) < 128)
6903 if (AGGREGATE_TYPE_P (type))
6905 /* Walk the aggregates recursively. */
6906 switch (TREE_CODE (type))
6910 case QUAL_UNION_TYPE:
6914 /* Walk all the structure fields. */
6915 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6917 if (TREE_CODE (field) == FIELD_DECL
6918 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6925 /* Just for use if some languages passes arrays by value. */
6926 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6937 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6938 XXX: This function is obsolete and is only used for checking psABI
6939 compatibility with previous versions of GCC. */
6942 ix86_compat_function_arg_boundary (enum machine_mode mode,
6943 const_tree type, unsigned int align)
6945 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6946 natural boundaries. */
6947 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6949 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6950 make an exception for SSE modes since these require 128bit
6953 The handling here differs from field_alignment. ICC aligns MMX
6954 arguments to 4 byte boundaries, while structure fields are aligned
6955 to 8 byte boundaries. */
6958 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6959 align = PARM_BOUNDARY;
6963 if (!ix86_compat_aligned_value_p (type))
6964 align = PARM_BOUNDARY;
6967 if (align > BIGGEST_ALIGNMENT)
6968 align = BIGGEST_ALIGNMENT;
6972 /* Return true when TYPE should be 128bit aligned for 32bit argument
6976 ix86_contains_aligned_value_p (const_tree type)
6978 enum machine_mode mode = TYPE_MODE (type);
6980 if (mode == XFmode || mode == XCmode)
6983 if (TYPE_ALIGN (type) < 128)
6986 if (AGGREGATE_TYPE_P (type))
6988 /* Walk the aggregates recursively. */
6989 switch (TREE_CODE (type))
6993 case QUAL_UNION_TYPE:
6997 /* Walk all the structure fields. */
6998 for (field = TYPE_FIELDS (type);
7000 field = DECL_CHAIN (field))
7002 if (TREE_CODE (field) == FIELD_DECL
7003 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7010 /* Just for use if some languages passes arrays by value. */
7011 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7020 return TYPE_ALIGN (type) >= 128;
7025 /* Gives the alignment boundary, in bits, of an argument with the
7026 specified mode and type. */
7029 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7034 /* Since the main variant type is used for call, we convert it to
7035 the main variant type. */
7036 type = TYPE_MAIN_VARIANT (type);
7037 align = TYPE_ALIGN (type);
7040 align = GET_MODE_ALIGNMENT (mode);
7041 if (align < PARM_BOUNDARY)
7042 align = PARM_BOUNDARY;
7046 unsigned int saved_align = align;
7050 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7053 if (mode == XFmode || mode == XCmode)
7054 align = PARM_BOUNDARY;
7056 else if (!ix86_contains_aligned_value_p (type))
7057 align = PARM_BOUNDARY;
7060 align = PARM_BOUNDARY;
7065 && align != ix86_compat_function_arg_boundary (mode, type,
7069 inform (input_location,
7070 "The ABI for passing parameters with %d-byte"
7071 " alignment has changed in GCC 4.6",
7072 align / BITS_PER_UNIT);
7079 /* Return true if N is a possible register number of function value. */
7082 ix86_function_value_regno_p (const unsigned int regno)
7089 case FIRST_FLOAT_REG:
7090 /* TODO: The function should depend on current function ABI but
7091 builtins.c would need updating then. Therefore we use the
7093 if (TARGET_64BIT && ix86_abi == MS_ABI)
7095 return TARGET_FLOAT_RETURNS_IN_80387;
7101 if (TARGET_MACHO || TARGET_64BIT)
7109 /* Define how to find the value returned by a function.
7110 VALTYPE is the data type of the value (as a tree).
7111 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7112 otherwise, FUNC is 0. */
7115 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7116 const_tree fntype, const_tree fn)
7120 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7121 we normally prevent this case when mmx is not available. However
7122 some ABIs may require the result to be returned like DImode. */
7123 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7124 regno = FIRST_MMX_REG;
7126 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7127 we prevent this case when sse is not available. However some ABIs
7128 may require the result to be returned like integer TImode. */
7129 else if (mode == TImode
7130 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7131 regno = FIRST_SSE_REG;
7133 /* 32-byte vector modes in %ymm0. */
7134 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7135 regno = FIRST_SSE_REG;
7137 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7138 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7139 regno = FIRST_FLOAT_REG;
7141 /* Most things go in %eax. */
7144 /* Override FP return register with %xmm0 for local functions when
7145 SSE math is enabled or for functions with sseregparm attribute. */
7146 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7148 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7149 if ((sse_level >= 1 && mode == SFmode)
7150 || (sse_level == 2 && mode == DFmode))
7151 regno = FIRST_SSE_REG;
7154 /* OImode shouldn't be used directly. */
7155 gcc_assert (mode != OImode);
7157 return gen_rtx_REG (orig_mode, regno);
7161 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7166 /* Handle libcalls, which don't provide a type node. */
7167 if (valtype == NULL)
7181 regno = FIRST_SSE_REG;
7185 regno = FIRST_FLOAT_REG;
7193 return gen_rtx_REG (mode, regno);
7195 else if (POINTER_TYPE_P (valtype))
7197 /* Pointers are always returned in Pmode. */
7201 ret = construct_container (mode, orig_mode, valtype, 1,
7202 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7203 x86_64_int_return_registers, 0);
7205 /* For zero sized structures, construct_container returns NULL, but we
7206 need to keep rest of compiler happy by returning meaningful value. */
7208 ret = gen_rtx_REG (orig_mode, AX_REG);
7214 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7216 unsigned int regno = AX_REG;
7220 switch (GET_MODE_SIZE (mode))
7223 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7224 && !COMPLEX_MODE_P (mode))
7225 regno = FIRST_SSE_REG;
7229 if (mode == SFmode || mode == DFmode)
7230 regno = FIRST_SSE_REG;
7236 return gen_rtx_REG (orig_mode, regno);
7240 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7241 enum machine_mode orig_mode, enum machine_mode mode)
7243 const_tree fn, fntype;
7246 if (fntype_or_decl && DECL_P (fntype_or_decl))
7247 fn = fntype_or_decl;
7248 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7250 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7251 return function_value_ms_64 (orig_mode, mode);
7252 else if (TARGET_64BIT)
7253 return function_value_64 (orig_mode, mode, valtype);
7255 return function_value_32 (orig_mode, mode, fntype, fn);
7259 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7260 bool outgoing ATTRIBUTE_UNUSED)
7262 enum machine_mode mode, orig_mode;
7264 orig_mode = TYPE_MODE (valtype);
7265 mode = type_natural_mode (valtype, NULL);
7266 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7269 /* Pointer function arguments and return values are promoted to Pmode. */
7271 static enum machine_mode
7272 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7273 int *punsignedp, const_tree fntype,
7276 if (type != NULL_TREE && POINTER_TYPE_P (type))
7278 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7281 return default_promote_function_mode (type, mode, punsignedp, fntype,
7286 ix86_libcall_value (enum machine_mode mode)
7288 return ix86_function_value_1 (NULL, NULL, mode, mode);
7291 /* Return true iff type is returned in memory. */
7293 static bool ATTRIBUTE_UNUSED
7294 return_in_memory_32 (const_tree type, enum machine_mode mode)
7298 if (mode == BLKmode)
7301 size = int_size_in_bytes (type);
7303 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7306 if (VECTOR_MODE_P (mode) || mode == TImode)
7308 /* User-created vectors small enough to fit in EAX. */
7312 /* MMX/3dNow values are returned in MM0,
7313 except when it doesn't exits or the ABI prescribes otherwise. */
7315 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7317 /* SSE values are returned in XMM0, except when it doesn't exist. */
7321 /* AVX values are returned in YMM0, except when it doesn't exist. */
7332 /* OImode shouldn't be used directly. */
7333 gcc_assert (mode != OImode);
7338 static bool ATTRIBUTE_UNUSED
7339 return_in_memory_64 (const_tree type, enum machine_mode mode)
7341 int needed_intregs, needed_sseregs;
7342 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7345 static bool ATTRIBUTE_UNUSED
7346 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7348 HOST_WIDE_INT size = int_size_in_bytes (type);
7350 /* __m128 is returned in xmm0. */
7351 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7352 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7355 /* Otherwise, the size must be exactly in [1248]. */
7356 return size != 1 && size != 2 && size != 4 && size != 8;
7360 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7362 #ifdef SUBTARGET_RETURN_IN_MEMORY
7363 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7365 const enum machine_mode mode = type_natural_mode (type, NULL);
7369 if (ix86_function_type_abi (fntype) == MS_ABI)
7370 return return_in_memory_ms_64 (type, mode);
7372 return return_in_memory_64 (type, mode);
7375 return return_in_memory_32 (type, mode);
7379 /* When returning SSE vector types, we have a choice of either
7380 (1) being abi incompatible with a -march switch, or
7381 (2) generating an error.
7382 Given no good solution, I think the safest thing is one warning.
7383 The user won't be able to use -Werror, but....
7385 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7386 called in response to actually generating a caller or callee that
7387 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7388 via aggregate_value_p for general type probing from tree-ssa. */
7391 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7393 static bool warnedsse, warnedmmx;
7395 if (!TARGET_64BIT && type)
7397 /* Look at the return type of the function, not the function type. */
7398 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7400 if (!TARGET_SSE && !warnedsse)
7403 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7406 warning (0, "SSE vector return without SSE enabled "
7411 if (!TARGET_MMX && !warnedmmx)
7413 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7416 warning (0, "MMX vector return without MMX enabled "
7426 /* Create the va_list data type. */
7428 /* Returns the calling convention specific va_list date type.
7429 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7432 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7434 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7436 /* For i386 we use plain pointer to argument area. */
7437 if (!TARGET_64BIT || abi == MS_ABI)
7438 return build_pointer_type (char_type_node);
7440 record = lang_hooks.types.make_type (RECORD_TYPE);
7441 type_decl = build_decl (BUILTINS_LOCATION,
7442 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7444 f_gpr = build_decl (BUILTINS_LOCATION,
7445 FIELD_DECL, get_identifier ("gp_offset"),
7446 unsigned_type_node);
7447 f_fpr = build_decl (BUILTINS_LOCATION,
7448 FIELD_DECL, get_identifier ("fp_offset"),
7449 unsigned_type_node);
7450 f_ovf = build_decl (BUILTINS_LOCATION,
7451 FIELD_DECL, get_identifier ("overflow_arg_area"),
7453 f_sav = build_decl (BUILTINS_LOCATION,
7454 FIELD_DECL, get_identifier ("reg_save_area"),
7457 va_list_gpr_counter_field = f_gpr;
7458 va_list_fpr_counter_field = f_fpr;
7460 DECL_FIELD_CONTEXT (f_gpr) = record;
7461 DECL_FIELD_CONTEXT (f_fpr) = record;
7462 DECL_FIELD_CONTEXT (f_ovf) = record;
7463 DECL_FIELD_CONTEXT (f_sav) = record;
7465 TYPE_STUB_DECL (record) = type_decl;
7466 TYPE_NAME (record) = type_decl;
7467 TYPE_FIELDS (record) = f_gpr;
7468 DECL_CHAIN (f_gpr) = f_fpr;
7469 DECL_CHAIN (f_fpr) = f_ovf;
7470 DECL_CHAIN (f_ovf) = f_sav;
7472 layout_type (record);
7474 /* The correct type is an array type of one element. */
7475 return build_array_type (record, build_index_type (size_zero_node));
7478 /* Setup the builtin va_list data type and for 64-bit the additional
7479 calling convention specific va_list data types. */
7482 ix86_build_builtin_va_list (void)
7484 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7486 /* Initialize abi specific va_list builtin types. */
7490 if (ix86_abi == MS_ABI)
7492 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7493 if (TREE_CODE (t) != RECORD_TYPE)
7494 t = build_variant_type_copy (t);
7495 sysv_va_list_type_node = t;
7500 if (TREE_CODE (t) != RECORD_TYPE)
7501 t = build_variant_type_copy (t);
7502 sysv_va_list_type_node = t;
7504 if (ix86_abi != MS_ABI)
7506 t = ix86_build_builtin_va_list_abi (MS_ABI);
7507 if (TREE_CODE (t) != RECORD_TYPE)
7508 t = build_variant_type_copy (t);
7509 ms_va_list_type_node = t;
7514 if (TREE_CODE (t) != RECORD_TYPE)
7515 t = build_variant_type_copy (t);
7516 ms_va_list_type_node = t;
7523 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7526 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7532 /* GPR size of varargs save area. */
7533 if (cfun->va_list_gpr_size)
7534 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7536 ix86_varargs_gpr_size = 0;
7538 /* FPR size of varargs save area. We don't need it if we don't pass
7539 anything in SSE registers. */
7540 if (TARGET_SSE && cfun->va_list_fpr_size)
7541 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7543 ix86_varargs_fpr_size = 0;
7545 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7548 save_area = frame_pointer_rtx;
7549 set = get_varargs_alias_set ();
7551 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7552 if (max > X86_64_REGPARM_MAX)
7553 max = X86_64_REGPARM_MAX;
7555 for (i = cum->regno; i < max; i++)
7557 mem = gen_rtx_MEM (Pmode,
7558 plus_constant (save_area, i * UNITS_PER_WORD));
7559 MEM_NOTRAP_P (mem) = 1;
7560 set_mem_alias_set (mem, set);
7561 emit_move_insn (mem, gen_rtx_REG (Pmode,
7562 x86_64_int_parameter_registers[i]));
7565 if (ix86_varargs_fpr_size)
7567 enum machine_mode smode;
7570 /* Now emit code to save SSE registers. The AX parameter contains number
7571 of SSE parameter registers used to call this function, though all we
7572 actually check here is the zero/non-zero status. */
7574 label = gen_label_rtx ();
7575 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7576 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7579 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7580 we used movdqa (i.e. TImode) instead? Perhaps even better would
7581 be if we could determine the real mode of the data, via a hook
7582 into pass_stdarg. Ignore all that for now. */
7584 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7585 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7587 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7588 if (max > X86_64_SSE_REGPARM_MAX)
7589 max = X86_64_SSE_REGPARM_MAX;
7591 for (i = cum->sse_regno; i < max; ++i)
7593 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7594 mem = gen_rtx_MEM (smode, mem);
7595 MEM_NOTRAP_P (mem) = 1;
7596 set_mem_alias_set (mem, set);
7597 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7599 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7607 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7609 alias_set_type set = get_varargs_alias_set ();
7612 /* Reset to zero, as there might be a sysv vaarg used
7614 ix86_varargs_gpr_size = 0;
7615 ix86_varargs_fpr_size = 0;
7617 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7621 mem = gen_rtx_MEM (Pmode,
7622 plus_constant (virtual_incoming_args_rtx,
7623 i * UNITS_PER_WORD));
7624 MEM_NOTRAP_P (mem) = 1;
7625 set_mem_alias_set (mem, set);
7627 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7628 emit_move_insn (mem, reg);
7633 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7634 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7637 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7638 CUMULATIVE_ARGS next_cum;
7641 /* This argument doesn't appear to be used anymore. Which is good,
7642 because the old code here didn't suppress rtl generation. */
7643 gcc_assert (!no_rtl);
7648 fntype = TREE_TYPE (current_function_decl);
7650 /* For varargs, we do not want to skip the dummy va_dcl argument.
7651 For stdargs, we do want to skip the last named argument. */
7653 if (stdarg_p (fntype))
7654 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7657 if (cum->call_abi == MS_ABI)
7658 setup_incoming_varargs_ms_64 (&next_cum);
7660 setup_incoming_varargs_64 (&next_cum);
7663 /* Checks if TYPE is of kind va_list char *. */
7666 is_va_list_char_pointer (tree type)
7670 /* For 32-bit it is always true. */
7673 canonic = ix86_canonical_va_list_type (type);
7674 return (canonic == ms_va_list_type_node
7675 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7678 /* Implement va_start. */
7681 ix86_va_start (tree valist, rtx nextarg)
7683 HOST_WIDE_INT words, n_gpr, n_fpr;
7684 tree f_gpr, f_fpr, f_ovf, f_sav;
7685 tree gpr, fpr, ovf, sav, t;
7689 if (flag_split_stack
7690 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7692 unsigned int scratch_regno;
7694 /* When we are splitting the stack, we can't refer to the stack
7695 arguments using internal_arg_pointer, because they may be on
7696 the old stack. The split stack prologue will arrange to
7697 leave a pointer to the old stack arguments in a scratch
7698 register, which we here copy to a pseudo-register. The split
7699 stack prologue can't set the pseudo-register directly because
7700 it (the prologue) runs before any registers have been saved. */
7702 scratch_regno = split_stack_prologue_scratch_regno ();
7703 if (scratch_regno != INVALID_REGNUM)
7707 reg = gen_reg_rtx (Pmode);
7708 cfun->machine->split_stack_varargs_pointer = reg;
7711 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7715 push_topmost_sequence ();
7716 emit_insn_after (seq, entry_of_function ());
7717 pop_topmost_sequence ();
7721 /* Only 64bit target needs something special. */
7722 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7724 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7725 std_expand_builtin_va_start (valist, nextarg);
7730 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7731 next = expand_binop (ptr_mode, add_optab,
7732 cfun->machine->split_stack_varargs_pointer,
7733 crtl->args.arg_offset_rtx,
7734 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7735 convert_move (va_r, next, 0);
7740 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7741 f_fpr = DECL_CHAIN (f_gpr);
7742 f_ovf = DECL_CHAIN (f_fpr);
7743 f_sav = DECL_CHAIN (f_ovf);
7745 valist = build_simple_mem_ref (valist);
7746 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7747 /* The following should be folded into the MEM_REF offset. */
7748 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7750 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7752 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7754 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7757 /* Count number of gp and fp argument registers used. */
7758 words = crtl->args.info.words;
7759 n_gpr = crtl->args.info.regno;
7760 n_fpr = crtl->args.info.sse_regno;
7762 if (cfun->va_list_gpr_size)
7764 type = TREE_TYPE (gpr);
7765 t = build2 (MODIFY_EXPR, type,
7766 gpr, build_int_cst (type, n_gpr * 8));
7767 TREE_SIDE_EFFECTS (t) = 1;
7768 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7771 if (TARGET_SSE && cfun->va_list_fpr_size)
7773 type = TREE_TYPE (fpr);
7774 t = build2 (MODIFY_EXPR, type, fpr,
7775 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7776 TREE_SIDE_EFFECTS (t) = 1;
7777 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7780 /* Find the overflow area. */
7781 type = TREE_TYPE (ovf);
7782 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7783 ovf_rtx = crtl->args.internal_arg_pointer;
7785 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7786 t = make_tree (type, ovf_rtx);
7788 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7789 t = build2 (MODIFY_EXPR, type, ovf, t);
7790 TREE_SIDE_EFFECTS (t) = 1;
7791 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7793 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7795 /* Find the register save area.
7796 Prologue of the function save it right above stack frame. */
7797 type = TREE_TYPE (sav);
7798 t = make_tree (type, frame_pointer_rtx);
7799 if (!ix86_varargs_gpr_size)
7800 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7801 t = build2 (MODIFY_EXPR, type, sav, t);
7802 TREE_SIDE_EFFECTS (t) = 1;
7803 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7807 /* Implement va_arg. */
7810 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7813 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7814 tree f_gpr, f_fpr, f_ovf, f_sav;
7815 tree gpr, fpr, ovf, sav, t;
7817 tree lab_false, lab_over = NULL_TREE;
7822 enum machine_mode nat_mode;
7823 unsigned int arg_boundary;
7825 /* Only 64bit target needs something special. */
7826 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7827 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7829 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7830 f_fpr = DECL_CHAIN (f_gpr);
7831 f_ovf = DECL_CHAIN (f_fpr);
7832 f_sav = DECL_CHAIN (f_ovf);
7834 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7835 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7836 valist = build_va_arg_indirect_ref (valist);
7837 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7838 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7839 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7841 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7843 type = build_pointer_type (type);
7844 size = int_size_in_bytes (type);
7845 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7847 nat_mode = type_natural_mode (type, NULL);
7856 /* Unnamed 256bit vector mode parameters are passed on stack. */
7857 if (!TARGET_64BIT_MS_ABI)
7864 container = construct_container (nat_mode, TYPE_MODE (type),
7865 type, 0, X86_64_REGPARM_MAX,
7866 X86_64_SSE_REGPARM_MAX, intreg,
7871 /* Pull the value out of the saved registers. */
7873 addr = create_tmp_var (ptr_type_node, "addr");
7877 int needed_intregs, needed_sseregs;
7879 tree int_addr, sse_addr;
7881 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7882 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7884 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7886 need_temp = (!REG_P (container)
7887 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7888 || TYPE_ALIGN (type) > 128));
7890 /* In case we are passing structure, verify that it is consecutive block
7891 on the register save area. If not we need to do moves. */
7892 if (!need_temp && !REG_P (container))
7894 /* Verify that all registers are strictly consecutive */
7895 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7899 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7901 rtx slot = XVECEXP (container, 0, i);
7902 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7903 || INTVAL (XEXP (slot, 1)) != i * 16)
7911 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7913 rtx slot = XVECEXP (container, 0, i);
7914 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7915 || INTVAL (XEXP (slot, 1)) != i * 8)
7927 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7928 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7931 /* First ensure that we fit completely in registers. */
7934 t = build_int_cst (TREE_TYPE (gpr),
7935 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7936 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7937 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7938 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7939 gimplify_and_add (t, pre_p);
7943 t = build_int_cst (TREE_TYPE (fpr),
7944 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7945 + X86_64_REGPARM_MAX * 8);
7946 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7947 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7948 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7949 gimplify_and_add (t, pre_p);
7952 /* Compute index to start of area used for integer regs. */
7955 /* int_addr = gpr + sav; */
7956 t = fold_build_pointer_plus (sav, gpr);
7957 gimplify_assign (int_addr, t, pre_p);
7961 /* sse_addr = fpr + sav; */
7962 t = fold_build_pointer_plus (sav, fpr);
7963 gimplify_assign (sse_addr, t, pre_p);
7967 int i, prev_size = 0;
7968 tree temp = create_tmp_var (type, "va_arg_tmp");
7971 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7972 gimplify_assign (addr, t, pre_p);
7974 for (i = 0; i < XVECLEN (container, 0); i++)
7976 rtx slot = XVECEXP (container, 0, i);
7977 rtx reg = XEXP (slot, 0);
7978 enum machine_mode mode = GET_MODE (reg);
7984 tree dest_addr, dest;
7985 int cur_size = GET_MODE_SIZE (mode);
7987 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7988 prev_size = INTVAL (XEXP (slot, 1));
7989 if (prev_size + cur_size > size)
7991 cur_size = size - prev_size;
7992 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7993 if (mode == BLKmode)
7996 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7997 if (mode == GET_MODE (reg))
7998 addr_type = build_pointer_type (piece_type);
8000 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8002 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8005 if (SSE_REGNO_P (REGNO (reg)))
8007 src_addr = sse_addr;
8008 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8012 src_addr = int_addr;
8013 src_offset = REGNO (reg) * 8;
8015 src_addr = fold_convert (addr_type, src_addr);
8016 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8018 dest_addr = fold_convert (daddr_type, addr);
8019 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8020 if (cur_size == GET_MODE_SIZE (mode))
8022 src = build_va_arg_indirect_ref (src_addr);
8023 dest = build_va_arg_indirect_ref (dest_addr);
8025 gimplify_assign (dest, src, pre_p);
8030 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
8031 3, dest_addr, src_addr,
8032 size_int (cur_size));
8033 gimplify_and_add (copy, pre_p);
8035 prev_size += cur_size;
8041 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8042 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8043 gimplify_assign (gpr, t, pre_p);
8048 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8049 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8050 gimplify_assign (fpr, t, pre_p);
8053 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8055 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8058 /* ... otherwise out of the overflow area. */
8060 /* When we align parameter on stack for caller, if the parameter
8061 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8062 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8063 here with caller. */
8064 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8065 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8066 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8068 /* Care for on-stack alignment if needed. */
8069 if (arg_boundary <= 64 || size == 0)
8073 HOST_WIDE_INT align = arg_boundary / 8;
8074 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8075 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8076 build_int_cst (TREE_TYPE (t), -align));
8079 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8080 gimplify_assign (addr, t, pre_p);
8082 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8083 gimplify_assign (unshare_expr (ovf), t, pre_p);
8086 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8088 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8089 addr = fold_convert (ptrtype, addr);
8092 addr = build_va_arg_indirect_ref (addr);
8093 return build_va_arg_indirect_ref (addr);
8096 /* Return true if OPNUM's MEM should be matched
8097 in movabs* patterns. */
8100 ix86_check_movabs (rtx insn, int opnum)
8104 set = PATTERN (insn);
8105 if (GET_CODE (set) == PARALLEL)
8106 set = XVECEXP (set, 0, 0);
8107 gcc_assert (GET_CODE (set) == SET);
8108 mem = XEXP (set, opnum);
8109 while (GET_CODE (mem) == SUBREG)
8110 mem = SUBREG_REG (mem);
8111 gcc_assert (MEM_P (mem));
8112 return volatile_ok || !MEM_VOLATILE_P (mem);
8115 /* Initialize the table of extra 80387 mathematical constants. */
8118 init_ext_80387_constants (void)
8120 static const char * cst[5] =
8122 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8123 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8124 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8125 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8126 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8130 for (i = 0; i < 5; i++)
8132 real_from_string (&ext_80387_constants_table[i], cst[i]);
8133 /* Ensure each constant is rounded to XFmode precision. */
8134 real_convert (&ext_80387_constants_table[i],
8135 XFmode, &ext_80387_constants_table[i]);
8138 ext_80387_constants_init = 1;
8141 /* Return non-zero if the constant is something that
8142 can be loaded with a special instruction. */
8145 standard_80387_constant_p (rtx x)
8147 enum machine_mode mode = GET_MODE (x);
8151 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8154 if (x == CONST0_RTX (mode))
8156 if (x == CONST1_RTX (mode))
8159 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8161 /* For XFmode constants, try to find a special 80387 instruction when
8162 optimizing for size or on those CPUs that benefit from them. */
8164 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8168 if (! ext_80387_constants_init)
8169 init_ext_80387_constants ();
8171 for (i = 0; i < 5; i++)
8172 if (real_identical (&r, &ext_80387_constants_table[i]))
8176 /* Load of the constant -0.0 or -1.0 will be split as
8177 fldz;fchs or fld1;fchs sequence. */
8178 if (real_isnegzero (&r))
8180 if (real_identical (&r, &dconstm1))
8186 /* Return the opcode of the special instruction to be used to load
8190 standard_80387_constant_opcode (rtx x)
8192 switch (standard_80387_constant_p (x))
8216 /* Return the CONST_DOUBLE representing the 80387 constant that is
8217 loaded by the specified special instruction. The argument IDX
8218 matches the return value from standard_80387_constant_p. */
8221 standard_80387_constant_rtx (int idx)
8225 if (! ext_80387_constants_init)
8226 init_ext_80387_constants ();
8242 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8246 /* Return 1 if X is all 0s and 2 if x is all 1s
8247 in supported SSE/AVX vector mode. */
8250 standard_sse_constant_p (rtx x)
8252 enum machine_mode mode = GET_MODE (x);
8254 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8256 if (vector_all_ones_operand (x, mode))
8278 /* Return the opcode of the special instruction to be used to load
8282 standard_sse_constant_opcode (rtx insn, rtx x)
8284 switch (standard_sse_constant_p (x))
8287 switch (get_attr_mode (insn))
8290 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8291 return "%vpxor\t%0, %d0";
8293 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8294 return "%vxorpd\t%0, %d0";
8296 return "%vxorps\t%0, %d0";
8299 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8300 return "vpxor\t%x0, %x0, %x0";
8302 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8303 return "vxorpd\t%x0, %x0, %x0";
8305 return "vxorps\t%x0, %x0, %x0";
8313 return "vpcmpeqd\t%0, %0, %0";
8315 return "pcmpeqd\t%0, %0";
8323 /* Returns true if OP contains a symbol reference */
8326 symbolic_reference_mentioned_p (rtx op)
8331 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8334 fmt = GET_RTX_FORMAT (GET_CODE (op));
8335 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8341 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8342 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8346 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8353 /* Return true if it is appropriate to emit `ret' instructions in the
8354 body of a function. Do this only if the epilogue is simple, needing a
8355 couple of insns. Prior to reloading, we can't tell how many registers
8356 must be saved, so return false then. Return false if there is no frame
8357 marker to de-allocate. */
8360 ix86_can_use_return_insn_p (void)
8362 struct ix86_frame frame;
8364 if (! reload_completed || frame_pointer_needed)
8367 /* Don't allow more than 32k pop, since that's all we can do
8368 with one instruction. */
8369 if (crtl->args.pops_args && crtl->args.size >= 32768)
8372 ix86_compute_frame_layout (&frame);
8373 return (frame.stack_pointer_offset == UNITS_PER_WORD
8374 && (frame.nregs + frame.nsseregs) == 0);
8377 /* Value should be nonzero if functions must have frame pointers.
8378 Zero means the frame pointer need not be set up (and parms may
8379 be accessed via the stack pointer) in functions that seem suitable. */
8382 ix86_frame_pointer_required (void)
8384 /* If we accessed previous frames, then the generated code expects
8385 to be able to access the saved ebp value in our frame. */
8386 if (cfun->machine->accesses_prev_frame)
8389 /* Several x86 os'es need a frame pointer for other reasons,
8390 usually pertaining to setjmp. */
8391 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8394 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8395 turns off the frame pointer by default. Turn it back on now if
8396 we've not got a leaf function. */
8397 if (TARGET_OMIT_LEAF_FRAME_POINTER
8398 && (!current_function_is_leaf
8399 || ix86_current_function_calls_tls_descriptor))
8402 if (crtl->profile && !flag_fentry)
8408 /* Record that the current function accesses previous call frames. */
8411 ix86_setup_frame_addresses (void)
8413 cfun->machine->accesses_prev_frame = 1;
8416 #ifndef USE_HIDDEN_LINKONCE
8417 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8418 # define USE_HIDDEN_LINKONCE 1
8420 # define USE_HIDDEN_LINKONCE 0
8424 static int pic_labels_used;
8426 /* Fills in the label name that should be used for a pc thunk for
8427 the given register. */
8430 get_pc_thunk_name (char name[32], unsigned int regno)
8432 gcc_assert (!TARGET_64BIT);
8434 if (USE_HIDDEN_LINKONCE)
8435 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
8437 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8441 /* This function generates code for -fpic that loads %ebx with
8442 the return address of the caller and then returns. */
8445 ix86_code_end (void)
8450 for (regno = AX_REG; regno <= SP_REG; regno++)
8455 if (!(pic_labels_used & (1 << regno)))
8458 get_pc_thunk_name (name, regno);
8460 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8461 get_identifier (name),
8462 build_function_type_list (void_type_node, NULL_TREE));
8463 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8464 NULL_TREE, void_type_node);
8465 TREE_PUBLIC (decl) = 1;
8466 TREE_STATIC (decl) = 1;
8471 switch_to_section (darwin_sections[text_coal_section]);
8472 fputs ("\t.weak_definition\t", asm_out_file);
8473 assemble_name (asm_out_file, name);
8474 fputs ("\n\t.private_extern\t", asm_out_file);
8475 assemble_name (asm_out_file, name);
8476 putc ('\n', asm_out_file);
8477 ASM_OUTPUT_LABEL (asm_out_file, name);
8478 DECL_WEAK (decl) = 1;
8482 if (USE_HIDDEN_LINKONCE)
8484 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8486 targetm.asm_out.unique_section (decl, 0);
8487 switch_to_section (get_named_section (decl, NULL, 0));
8489 targetm.asm_out.globalize_label (asm_out_file, name);
8490 fputs ("\t.hidden\t", asm_out_file);
8491 assemble_name (asm_out_file, name);
8492 putc ('\n', asm_out_file);
8493 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8497 switch_to_section (text_section);
8498 ASM_OUTPUT_LABEL (asm_out_file, name);
8501 DECL_INITIAL (decl) = make_node (BLOCK);
8502 current_function_decl = decl;
8503 init_function_start (decl);
8504 first_function_block_is_cold = false;
8505 /* Make sure unwind info is emitted for the thunk if needed. */
8506 final_start_function (emit_barrier (), asm_out_file, 1);
8508 /* Pad stack IP move with 4 instructions (two NOPs count
8509 as one instruction). */
8510 if (TARGET_PAD_SHORT_FUNCTION)
8515 fputs ("\tnop\n", asm_out_file);
8518 xops[0] = gen_rtx_REG (Pmode, regno);
8519 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8520 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8521 fputs ("\tret\n", asm_out_file);
8522 final_end_function ();
8523 init_insn_lengths ();
8524 free_after_compilation (cfun);
8526 current_function_decl = NULL;
8529 if (flag_split_stack)
8530 file_end_indicate_split_stack ();
8533 /* Emit code for the SET_GOT patterns. */
8536 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8542 if (TARGET_VXWORKS_RTP && flag_pic)
8544 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8545 xops[2] = gen_rtx_MEM (Pmode,
8546 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8547 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8549 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8550 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8551 an unadorned address. */
8552 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8553 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8554 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8558 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8562 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8564 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8567 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8568 is what will be referenced by the Mach-O PIC subsystem. */
8570 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8573 targetm.asm_out.internal_label (asm_out_file, "L",
8574 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8579 get_pc_thunk_name (name, REGNO (dest));
8580 pic_labels_used |= 1 << REGNO (dest);
8582 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8583 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8584 output_asm_insn ("call\t%X2", xops);
8585 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8586 is what will be referenced by the Mach-O PIC subsystem. */
8589 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8591 targetm.asm_out.internal_label (asm_out_file, "L",
8592 CODE_LABEL_NUMBER (label));
8597 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8602 /* Generate an "push" pattern for input ARG. */
8607 struct machine_function *m = cfun->machine;
8609 if (m->fs.cfa_reg == stack_pointer_rtx)
8610 m->fs.cfa_offset += UNITS_PER_WORD;
8611 m->fs.sp_offset += UNITS_PER_WORD;
8613 return gen_rtx_SET (VOIDmode,
8615 gen_rtx_PRE_DEC (Pmode,
8616 stack_pointer_rtx)),
8620 /* Generate an "pop" pattern for input ARG. */
8625 return gen_rtx_SET (VOIDmode,
8628 gen_rtx_POST_INC (Pmode,
8629 stack_pointer_rtx)));
8632 /* Return >= 0 if there is an unused call-clobbered register available
8633 for the entire function. */
8636 ix86_select_alt_pic_regnum (void)
8638 if (current_function_is_leaf
8640 && !ix86_current_function_calls_tls_descriptor)
8643 /* Can't use the same register for both PIC and DRAP. */
8645 drap = REGNO (crtl->drap_reg);
8648 for (i = 2; i >= 0; --i)
8649 if (i != drap && !df_regs_ever_live_p (i))
8653 return INVALID_REGNUM;
8656 /* Return TRUE if we need to save REGNO. */
8659 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8661 if (pic_offset_table_rtx
8662 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8663 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8665 || crtl->calls_eh_return
8666 || crtl->uses_const_pool))
8667 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8669 if (crtl->calls_eh_return && maybe_eh_return)
8674 unsigned test = EH_RETURN_DATA_REGNO (i);
8675 if (test == INVALID_REGNUM)
8682 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8685 return (df_regs_ever_live_p (regno)
8686 && !call_used_regs[regno]
8687 && !fixed_regs[regno]
8688 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8691 /* Return number of saved general prupose registers. */
8694 ix86_nsaved_regs (void)
8699 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8700 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8705 /* Return number of saved SSE registrers. */
8708 ix86_nsaved_sseregs (void)
8713 if (!TARGET_64BIT_MS_ABI)
8715 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8716 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8721 /* Given FROM and TO register numbers, say whether this elimination is
8722 allowed. If stack alignment is needed, we can only replace argument
8723 pointer with hard frame pointer, or replace frame pointer with stack
8724 pointer. Otherwise, frame pointer elimination is automatically
8725 handled and all other eliminations are valid. */
8728 ix86_can_eliminate (const int from, const int to)
8730 if (stack_realign_fp)
8731 return ((from == ARG_POINTER_REGNUM
8732 && to == HARD_FRAME_POINTER_REGNUM)
8733 || (from == FRAME_POINTER_REGNUM
8734 && to == STACK_POINTER_REGNUM));
8736 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8739 /* Return the offset between two registers, one to be eliminated, and the other
8740 its replacement, at the start of a routine. */
8743 ix86_initial_elimination_offset (int from, int to)
8745 struct ix86_frame frame;
8746 ix86_compute_frame_layout (&frame);
8748 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8749 return frame.hard_frame_pointer_offset;
8750 else if (from == FRAME_POINTER_REGNUM
8751 && to == HARD_FRAME_POINTER_REGNUM)
8752 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8755 gcc_assert (to == STACK_POINTER_REGNUM);
8757 if (from == ARG_POINTER_REGNUM)
8758 return frame.stack_pointer_offset;
8760 gcc_assert (from == FRAME_POINTER_REGNUM);
8761 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8765 /* In a dynamically-aligned function, we can't know the offset from
8766 stack pointer to frame pointer, so we must ensure that setjmp
8767 eliminates fp against the hard fp (%ebp) rather than trying to
8768 index from %esp up to the top of the frame across a gap that is
8769 of unknown (at compile-time) size. */
8771 ix86_builtin_setjmp_frame_value (void)
8773 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8776 /* When using -fsplit-stack, the allocation routines set a field in
8777 the TCB to the bottom of the stack plus this much space, measured
8780 #define SPLIT_STACK_AVAILABLE 256
8782 /* Fill structure ix86_frame about frame of currently computed function. */
8785 ix86_compute_frame_layout (struct ix86_frame *frame)
8787 unsigned int stack_alignment_needed;
8788 HOST_WIDE_INT offset;
8789 unsigned int preferred_alignment;
8790 HOST_WIDE_INT size = get_frame_size ();
8791 HOST_WIDE_INT to_allocate;
8793 frame->nregs = ix86_nsaved_regs ();
8794 frame->nsseregs = ix86_nsaved_sseregs ();
8796 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8797 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8799 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8800 function prologues and leaf. */
8801 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8802 && (!current_function_is_leaf || cfun->calls_alloca != 0
8803 || ix86_current_function_calls_tls_descriptor))
8805 preferred_alignment = 16;
8806 stack_alignment_needed = 16;
8807 crtl->preferred_stack_boundary = 128;
8808 crtl->stack_alignment_needed = 128;
8811 gcc_assert (!size || stack_alignment_needed);
8812 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8813 gcc_assert (preferred_alignment <= stack_alignment_needed);
8815 /* For SEH we have to limit the amount of code movement into the prologue.
8816 At present we do this via a BLOCKAGE, at which point there's very little
8817 scheduling that can be done, which means that there's very little point
8818 in doing anything except PUSHs. */
8820 cfun->machine->use_fast_prologue_epilogue = false;
8822 /* During reload iteration the amount of registers saved can change.
8823 Recompute the value as needed. Do not recompute when amount of registers
8824 didn't change as reload does multiple calls to the function and does not
8825 expect the decision to change within single iteration. */
8826 else if (!optimize_function_for_size_p (cfun)
8827 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8829 int count = frame->nregs;
8830 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8832 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8834 /* The fast prologue uses move instead of push to save registers. This
8835 is significantly longer, but also executes faster as modern hardware
8836 can execute the moves in parallel, but can't do that for push/pop.
8838 Be careful about choosing what prologue to emit: When function takes
8839 many instructions to execute we may use slow version as well as in
8840 case function is known to be outside hot spot (this is known with
8841 feedback only). Weight the size of function by number of registers
8842 to save as it is cheap to use one or two push instructions but very
8843 slow to use many of them. */
8845 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8846 if (node->frequency < NODE_FREQUENCY_NORMAL
8847 || (flag_branch_probabilities
8848 && node->frequency < NODE_FREQUENCY_HOT))
8849 cfun->machine->use_fast_prologue_epilogue = false;
8851 cfun->machine->use_fast_prologue_epilogue
8852 = !expensive_function_p (count);
8855 frame->save_regs_using_mov
8856 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
8857 /* If static stack checking is enabled and done with probes,
8858 the registers need to be saved before allocating the frame. */
8859 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
8861 /* Skip return address. */
8862 offset = UNITS_PER_WORD;
8864 /* Skip pushed static chain. */
8865 if (ix86_static_chain_on_stack)
8866 offset += UNITS_PER_WORD;
8868 /* Skip saved base pointer. */
8869 if (frame_pointer_needed)
8870 offset += UNITS_PER_WORD;
8871 frame->hfp_save_offset = offset;
8873 /* The traditional frame pointer location is at the top of the frame. */
8874 frame->hard_frame_pointer_offset = offset;
8876 /* Register save area */
8877 offset += frame->nregs * UNITS_PER_WORD;
8878 frame->reg_save_offset = offset;
8880 /* Align and set SSE register save area. */
8881 if (frame->nsseregs)
8883 /* The only ABI that has saved SSE registers (Win64) also has a
8884 16-byte aligned default stack, and thus we don't need to be
8885 within the re-aligned local stack frame to save them. */
8886 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8887 offset = (offset + 16 - 1) & -16;
8888 offset += frame->nsseregs * 16;
8890 frame->sse_reg_save_offset = offset;
8892 /* The re-aligned stack starts here. Values before this point are not
8893 directly comparable with values below this point. In order to make
8894 sure that no value happens to be the same before and after, force
8895 the alignment computation below to add a non-zero value. */
8896 if (stack_realign_fp)
8897 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8900 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8901 offset += frame->va_arg_size;
8903 /* Align start of frame for local function. */
8904 if (stack_realign_fp
8905 || offset != frame->sse_reg_save_offset
8907 || !current_function_is_leaf
8908 || cfun->calls_alloca
8909 || ix86_current_function_calls_tls_descriptor)
8910 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8912 /* Frame pointer points here. */
8913 frame->frame_pointer_offset = offset;
8917 /* Add outgoing arguments area. Can be skipped if we eliminated
8918 all the function calls as dead code.
8919 Skipping is however impossible when function calls alloca. Alloca
8920 expander assumes that last crtl->outgoing_args_size
8921 of stack frame are unused. */
8922 if (ACCUMULATE_OUTGOING_ARGS
8923 && (!current_function_is_leaf || cfun->calls_alloca
8924 || ix86_current_function_calls_tls_descriptor))
8926 offset += crtl->outgoing_args_size;
8927 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8930 frame->outgoing_arguments_size = 0;
8932 /* Align stack boundary. Only needed if we're calling another function
8934 if (!current_function_is_leaf || cfun->calls_alloca
8935 || ix86_current_function_calls_tls_descriptor)
8936 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8938 /* We've reached end of stack frame. */
8939 frame->stack_pointer_offset = offset;
8941 /* Size prologue needs to allocate. */
8942 to_allocate = offset - frame->sse_reg_save_offset;
8944 if ((!to_allocate && frame->nregs <= 1)
8945 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8946 frame->save_regs_using_mov = false;
8948 if (ix86_using_red_zone ()
8949 && current_function_sp_is_unchanging
8950 && current_function_is_leaf
8951 && !ix86_current_function_calls_tls_descriptor)
8953 frame->red_zone_size = to_allocate;
8954 if (frame->save_regs_using_mov)
8955 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8956 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8957 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8960 frame->red_zone_size = 0;
8961 frame->stack_pointer_offset -= frame->red_zone_size;
8963 /* The SEH frame pointer location is near the bottom of the frame.
8964 This is enforced by the fact that the difference between the
8965 stack pointer and the frame pointer is limited to 240 bytes in
8966 the unwind data structure. */
8971 /* If we can leave the frame pointer where it is, do so. */
8972 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
8973 if (diff > 240 || (diff & 15) != 0)
8975 /* Ideally we'd determine what portion of the local stack frame
8976 (within the constraint of the lowest 240) is most heavily used.
8977 But without that complication, simply bias the frame pointer
8978 by 128 bytes so as to maximize the amount of the local stack
8979 frame that is addressable with 8-bit offsets. */
8980 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
8985 /* This is semi-inlined memory_address_length, but simplified
8986 since we know that we're always dealing with reg+offset, and
8987 to avoid having to create and discard all that rtl. */
8990 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8996 /* EBP and R13 cannot be encoded without an offset. */
8997 len = (regno == BP_REG || regno == R13_REG);
8999 else if (IN_RANGE (offset, -128, 127))
9002 /* ESP and R12 must be encoded with a SIB byte. */
9003 if (regno == SP_REG || regno == R12_REG)
9009 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9010 The valid base registers are taken from CFUN->MACHINE->FS. */
9013 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9015 const struct machine_function *m = cfun->machine;
9016 rtx base_reg = NULL;
9017 HOST_WIDE_INT base_offset = 0;
9019 if (m->use_fast_prologue_epilogue)
9021 /* Choose the base register most likely to allow the most scheduling
9022 opportunities. Generally FP is valid througout the function,
9023 while DRAP must be reloaded within the epilogue. But choose either
9024 over the SP due to increased encoding size. */
9028 base_reg = hard_frame_pointer_rtx;
9029 base_offset = m->fs.fp_offset - cfa_offset;
9031 else if (m->fs.drap_valid)
9033 base_reg = crtl->drap_reg;
9034 base_offset = 0 - cfa_offset;
9036 else if (m->fs.sp_valid)
9038 base_reg = stack_pointer_rtx;
9039 base_offset = m->fs.sp_offset - cfa_offset;
9044 HOST_WIDE_INT toffset;
9047 /* Choose the base register with the smallest address encoding.
9048 With a tie, choose FP > DRAP > SP. */
9051 base_reg = stack_pointer_rtx;
9052 base_offset = m->fs.sp_offset - cfa_offset;
9053 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9055 if (m->fs.drap_valid)
9057 toffset = 0 - cfa_offset;
9058 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9061 base_reg = crtl->drap_reg;
9062 base_offset = toffset;
9068 toffset = m->fs.fp_offset - cfa_offset;
9069 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9072 base_reg = hard_frame_pointer_rtx;
9073 base_offset = toffset;
9078 gcc_assert (base_reg != NULL);
9080 return plus_constant (base_reg, base_offset);
9083 /* Emit code to save registers in the prologue. */
9086 ix86_emit_save_regs (void)
9091 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9092 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9094 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9095 RTX_FRAME_RELATED_P (insn) = 1;
9099 /* Emit a single register save at CFA - CFA_OFFSET. */
9102 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9103 HOST_WIDE_INT cfa_offset)
9105 struct machine_function *m = cfun->machine;
9106 rtx reg = gen_rtx_REG (mode, regno);
9107 rtx mem, addr, base, insn;
9109 addr = choose_baseaddr (cfa_offset);
9110 mem = gen_frame_mem (mode, addr);
9112 /* For SSE saves, we need to indicate the 128-bit alignment. */
9113 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9115 insn = emit_move_insn (mem, reg);
9116 RTX_FRAME_RELATED_P (insn) = 1;
9119 if (GET_CODE (base) == PLUS)
9120 base = XEXP (base, 0);
9121 gcc_checking_assert (REG_P (base));
9123 /* When saving registers into a re-aligned local stack frame, avoid
9124 any tricky guessing by dwarf2out. */
9125 if (m->fs.realigned)
9127 gcc_checking_assert (stack_realign_drap);
9129 if (regno == REGNO (crtl->drap_reg))
9131 /* A bit of a hack. We force the DRAP register to be saved in
9132 the re-aligned stack frame, which provides us with a copy
9133 of the CFA that will last past the prologue. Install it. */
9134 gcc_checking_assert (cfun->machine->fs.fp_valid);
9135 addr = plus_constant (hard_frame_pointer_rtx,
9136 cfun->machine->fs.fp_offset - cfa_offset);
9137 mem = gen_rtx_MEM (mode, addr);
9138 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9142 /* The frame pointer is a stable reference within the
9143 aligned frame. Use it. */
9144 gcc_checking_assert (cfun->machine->fs.fp_valid);
9145 addr = plus_constant (hard_frame_pointer_rtx,
9146 cfun->machine->fs.fp_offset - cfa_offset);
9147 mem = gen_rtx_MEM (mode, addr);
9148 add_reg_note (insn, REG_CFA_EXPRESSION,
9149 gen_rtx_SET (VOIDmode, mem, reg));
9153 /* The memory may not be relative to the current CFA register,
9154 which means that we may need to generate a new pattern for
9155 use by the unwind info. */
9156 else if (base != m->fs.cfa_reg)
9158 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9159 mem = gen_rtx_MEM (mode, addr);
9160 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9164 /* Emit code to save registers using MOV insns.
9165 First register is stored at CFA - CFA_OFFSET. */
9167 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9171 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9172 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9174 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9175 cfa_offset -= UNITS_PER_WORD;
9179 /* Emit code to save SSE registers using MOV insns.
9180 First register is stored at CFA - CFA_OFFSET. */
9182 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9186 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9187 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9189 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9194 static GTY(()) rtx queued_cfa_restores;
9196 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9197 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9198 Don't add the note if the previously saved value will be left untouched
9199 within stack red-zone till return, as unwinders can find the same value
9200 in the register and on the stack. */
9203 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9205 if (!crtl->shrink_wrapped
9206 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9211 add_reg_note (insn, REG_CFA_RESTORE, reg);
9212 RTX_FRAME_RELATED_P (insn) = 1;
9216 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9219 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9222 ix86_add_queued_cfa_restore_notes (rtx insn)
9225 if (!queued_cfa_restores)
9227 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9229 XEXP (last, 1) = REG_NOTES (insn);
9230 REG_NOTES (insn) = queued_cfa_restores;
9231 queued_cfa_restores = NULL_RTX;
9232 RTX_FRAME_RELATED_P (insn) = 1;
9235 /* Expand prologue or epilogue stack adjustment.
9236 The pattern exist to put a dependency on all ebp-based memory accesses.
9237 STYLE should be negative if instructions should be marked as frame related,
9238 zero if %r11 register is live and cannot be freely used and positive
9242 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9243 int style, bool set_cfa)
9245 struct machine_function *m = cfun->machine;
9247 bool add_frame_related_expr = false;
9250 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9251 else if (x86_64_immediate_operand (offset, DImode))
9252 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9256 /* r11 is used by indirect sibcall return as well, set before the
9257 epilogue and used after the epilogue. */
9259 tmp = gen_rtx_REG (DImode, R11_REG);
9262 gcc_assert (src != hard_frame_pointer_rtx
9263 && dest != hard_frame_pointer_rtx);
9264 tmp = hard_frame_pointer_rtx;
9266 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9268 add_frame_related_expr = true;
9270 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9273 insn = emit_insn (insn);
9275 ix86_add_queued_cfa_restore_notes (insn);
9281 gcc_assert (m->fs.cfa_reg == src);
9282 m->fs.cfa_offset += INTVAL (offset);
9283 m->fs.cfa_reg = dest;
9285 r = gen_rtx_PLUS (Pmode, src, offset);
9286 r = gen_rtx_SET (VOIDmode, dest, r);
9287 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9288 RTX_FRAME_RELATED_P (insn) = 1;
9292 RTX_FRAME_RELATED_P (insn) = 1;
9293 if (add_frame_related_expr)
9295 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9296 r = gen_rtx_SET (VOIDmode, dest, r);
9297 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9301 if (dest == stack_pointer_rtx)
9303 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9304 bool valid = m->fs.sp_valid;
9306 if (src == hard_frame_pointer_rtx)
9308 valid = m->fs.fp_valid;
9309 ooffset = m->fs.fp_offset;
9311 else if (src == crtl->drap_reg)
9313 valid = m->fs.drap_valid;
9318 /* Else there are two possibilities: SP itself, which we set
9319 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9320 taken care of this by hand along the eh_return path. */
9321 gcc_checking_assert (src == stack_pointer_rtx
9322 || offset == const0_rtx);
9325 m->fs.sp_offset = ooffset - INTVAL (offset);
9326 m->fs.sp_valid = valid;
9330 /* Find an available register to be used as dynamic realign argument
9331 pointer regsiter. Such a register will be written in prologue and
9332 used in begin of body, so it must not be
9333 1. parameter passing register.
9335 We reuse static-chain register if it is available. Otherwise, we
9336 use DI for i386 and R13 for x86-64. We chose R13 since it has
9339 Return: the regno of chosen register. */
9342 find_drap_reg (void)
9344 tree decl = cfun->decl;
9348 /* Use R13 for nested function or function need static chain.
9349 Since function with tail call may use any caller-saved
9350 registers in epilogue, DRAP must not use caller-saved
9351 register in such case. */
9352 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9359 /* Use DI for nested function or function need static chain.
9360 Since function with tail call may use any caller-saved
9361 registers in epilogue, DRAP must not use caller-saved
9362 register in such case. */
9363 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9366 /* Reuse static chain register if it isn't used for parameter
9368 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9370 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9371 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9378 /* Return minimum incoming stack alignment. */
9381 ix86_minimum_incoming_stack_boundary (bool sibcall)
9383 unsigned int incoming_stack_boundary;
9385 /* Prefer the one specified at command line. */
9386 if (ix86_user_incoming_stack_boundary)
9387 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9388 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9389 if -mstackrealign is used, it isn't used for sibcall check and
9390 estimated stack alignment is 128bit. */
9393 && ix86_force_align_arg_pointer
9394 && crtl->stack_alignment_estimated == 128)
9395 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9397 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9399 /* Incoming stack alignment can be changed on individual functions
9400 via force_align_arg_pointer attribute. We use the smallest
9401 incoming stack boundary. */
9402 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9403 && lookup_attribute (ix86_force_align_arg_pointer_string,
9404 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9405 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9407 /* The incoming stack frame has to be aligned at least at
9408 parm_stack_boundary. */
9409 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9410 incoming_stack_boundary = crtl->parm_stack_boundary;
9412 /* Stack at entrance of main is aligned by runtime. We use the
9413 smallest incoming stack boundary. */
9414 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9415 && DECL_NAME (current_function_decl)
9416 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9417 && DECL_FILE_SCOPE_P (current_function_decl))
9418 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9420 return incoming_stack_boundary;
9423 /* Update incoming stack boundary and estimated stack alignment. */
9426 ix86_update_stack_boundary (void)
9428 ix86_incoming_stack_boundary
9429 = ix86_minimum_incoming_stack_boundary (false);
9431 /* x86_64 vararg needs 16byte stack alignment for register save
9435 && crtl->stack_alignment_estimated < 128)
9436 crtl->stack_alignment_estimated = 128;
9439 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9440 needed or an rtx for DRAP otherwise. */
9443 ix86_get_drap_rtx (void)
9445 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9446 crtl->need_drap = true;
9448 if (stack_realign_drap)
9450 /* Assign DRAP to vDRAP and returns vDRAP */
9451 unsigned int regno = find_drap_reg ();
9456 arg_ptr = gen_rtx_REG (Pmode, regno);
9457 crtl->drap_reg = arg_ptr;
9460 drap_vreg = copy_to_reg (arg_ptr);
9464 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9467 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9468 RTX_FRAME_RELATED_P (insn) = 1;
9476 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9479 ix86_internal_arg_pointer (void)
9481 return virtual_incoming_args_rtx;
9484 struct scratch_reg {
9489 /* Return a short-lived scratch register for use on function entry.
9490 In 32-bit mode, it is valid only after the registers are saved
9491 in the prologue. This register must be released by means of
9492 release_scratch_register_on_entry once it is dead. */
9495 get_scratch_register_on_entry (struct scratch_reg *sr)
9503 /* We always use R11 in 64-bit mode. */
9508 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9510 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9511 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9512 int regparm = ix86_function_regparm (fntype, decl);
9514 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9516 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9517 for the static chain register. */
9518 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9519 && drap_regno != AX_REG)
9521 else if (regparm < 2 && drap_regno != DX_REG)
9523 /* ecx is the static chain register. */
9524 else if (regparm < 3 && !fastcall_p && !static_chain_p
9525 && drap_regno != CX_REG)
9527 else if (ix86_save_reg (BX_REG, true))
9529 /* esi is the static chain register. */
9530 else if (!(regparm == 3 && static_chain_p)
9531 && ix86_save_reg (SI_REG, true))
9533 else if (ix86_save_reg (DI_REG, true))
9537 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9542 sr->reg = gen_rtx_REG (Pmode, regno);
9545 rtx insn = emit_insn (gen_push (sr->reg));
9546 RTX_FRAME_RELATED_P (insn) = 1;
9550 /* Release a scratch register obtained from the preceding function. */
9553 release_scratch_register_on_entry (struct scratch_reg *sr)
9557 rtx x, insn = emit_insn (gen_pop (sr->reg));
9559 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9560 RTX_FRAME_RELATED_P (insn) = 1;
9561 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9562 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9563 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9567 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9569 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9572 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9574 /* We skip the probe for the first interval + a small dope of 4 words and
9575 probe that many bytes past the specified size to maintain a protection
9576 area at the botton of the stack. */
9577 const int dope = 4 * UNITS_PER_WORD;
9578 rtx size_rtx = GEN_INT (size), last;
9580 /* See if we have a constant small number of probes to generate. If so,
9581 that's the easy case. The run-time loop is made up of 11 insns in the
9582 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9583 for n # of intervals. */
9584 if (size <= 5 * PROBE_INTERVAL)
9586 HOST_WIDE_INT i, adjust;
9587 bool first_probe = true;
9589 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9590 values of N from 1 until it exceeds SIZE. If only one probe is
9591 needed, this will not generate any code. Then adjust and probe
9592 to PROBE_INTERVAL + SIZE. */
9593 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9597 adjust = 2 * PROBE_INTERVAL + dope;
9598 first_probe = false;
9601 adjust = PROBE_INTERVAL;
9603 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9604 plus_constant (stack_pointer_rtx, -adjust)));
9605 emit_stack_probe (stack_pointer_rtx);
9609 adjust = size + PROBE_INTERVAL + dope;
9611 adjust = size + PROBE_INTERVAL - i;
9613 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9614 plus_constant (stack_pointer_rtx, -adjust)));
9615 emit_stack_probe (stack_pointer_rtx);
9617 /* Adjust back to account for the additional first interval. */
9618 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9619 plus_constant (stack_pointer_rtx,
9620 PROBE_INTERVAL + dope)));
9623 /* Otherwise, do the same as above, but in a loop. Note that we must be
9624 extra careful with variables wrapping around because we might be at
9625 the very top (or the very bottom) of the address space and we have
9626 to be able to handle this case properly; in particular, we use an
9627 equality test for the loop condition. */
9630 HOST_WIDE_INT rounded_size;
9631 struct scratch_reg sr;
9633 get_scratch_register_on_entry (&sr);
9636 /* Step 1: round SIZE to the previous multiple of the interval. */
9638 rounded_size = size & -PROBE_INTERVAL;
9641 /* Step 2: compute initial and final value of the loop counter. */
9643 /* SP = SP_0 + PROBE_INTERVAL. */
9644 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9645 plus_constant (stack_pointer_rtx,
9646 - (PROBE_INTERVAL + dope))));
9648 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9649 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9650 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9651 gen_rtx_PLUS (Pmode, sr.reg,
9652 stack_pointer_rtx)));
9657 while (SP != LAST_ADDR)
9659 SP = SP + PROBE_INTERVAL
9663 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9664 values of N from 1 until it is equal to ROUNDED_SIZE. */
9666 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9669 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9670 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9672 if (size != rounded_size)
9674 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9675 plus_constant (stack_pointer_rtx,
9676 rounded_size - size)));
9677 emit_stack_probe (stack_pointer_rtx);
9680 /* Adjust back to account for the additional first interval. */
9681 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9682 plus_constant (stack_pointer_rtx,
9683 PROBE_INTERVAL + dope)));
9685 release_scratch_register_on_entry (&sr);
9688 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9690 /* Even if the stack pointer isn't the CFA register, we need to correctly
9691 describe the adjustments made to it, in particular differentiate the
9692 frame-related ones from the frame-unrelated ones. */
9695 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9696 XVECEXP (expr, 0, 0)
9697 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9698 plus_constant (stack_pointer_rtx, -size));
9699 XVECEXP (expr, 0, 1)
9700 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9701 plus_constant (stack_pointer_rtx,
9702 PROBE_INTERVAL + dope + size));
9703 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9704 RTX_FRAME_RELATED_P (last) = 1;
9706 cfun->machine->fs.sp_offset += size;
9709 /* Make sure nothing is scheduled before we are done. */
9710 emit_insn (gen_blockage ());
9713 /* Adjust the stack pointer up to REG while probing it. */
9716 output_adjust_stack_and_probe (rtx reg)
9718 static int labelno = 0;
9719 char loop_lab[32], end_lab[32];
9722 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9723 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9725 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9727 /* Jump to END_LAB if SP == LAST_ADDR. */
9728 xops[0] = stack_pointer_rtx;
9730 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9731 fputs ("\tje\t", asm_out_file);
9732 assemble_name_raw (asm_out_file, end_lab);
9733 fputc ('\n', asm_out_file);
9735 /* SP = SP + PROBE_INTERVAL. */
9736 xops[1] = GEN_INT (PROBE_INTERVAL);
9737 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9740 xops[1] = const0_rtx;
9741 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9743 fprintf (asm_out_file, "\tjmp\t");
9744 assemble_name_raw (asm_out_file, loop_lab);
9745 fputc ('\n', asm_out_file);
9747 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9752 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9753 inclusive. These are offsets from the current stack pointer. */
9756 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9758 /* See if we have a constant small number of probes to generate. If so,
9759 that's the easy case. The run-time loop is made up of 7 insns in the
9760 generic case while the compile-time loop is made up of n insns for n #
9762 if (size <= 7 * PROBE_INTERVAL)
9766 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9767 it exceeds SIZE. If only one probe is needed, this will not
9768 generate any code. Then probe at FIRST + SIZE. */
9769 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9770 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9772 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9775 /* Otherwise, do the same as above, but in a loop. Note that we must be
9776 extra careful with variables wrapping around because we might be at
9777 the very top (or the very bottom) of the address space and we have
9778 to be able to handle this case properly; in particular, we use an
9779 equality test for the loop condition. */
9782 HOST_WIDE_INT rounded_size, last;
9783 struct scratch_reg sr;
9785 get_scratch_register_on_entry (&sr);
9788 /* Step 1: round SIZE to the previous multiple of the interval. */
9790 rounded_size = size & -PROBE_INTERVAL;
9793 /* Step 2: compute initial and final value of the loop counter. */
9795 /* TEST_OFFSET = FIRST. */
9796 emit_move_insn (sr.reg, GEN_INT (-first));
9798 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9799 last = first + rounded_size;
9804 while (TEST_ADDR != LAST_ADDR)
9806 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9810 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9811 until it is equal to ROUNDED_SIZE. */
9813 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9816 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9817 that SIZE is equal to ROUNDED_SIZE. */
9819 if (size != rounded_size)
9820 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9823 rounded_size - size));
9825 release_scratch_register_on_entry (&sr);
9828 /* Make sure nothing is scheduled before we are done. */
9829 emit_insn (gen_blockage ());
9832 /* Probe a range of stack addresses from REG to END, inclusive. These are
9833 offsets from the current stack pointer. */
9836 output_probe_stack_range (rtx reg, rtx end)
9838 static int labelno = 0;
9839 char loop_lab[32], end_lab[32];
9842 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9843 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9845 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9847 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9850 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9851 fputs ("\tje\t", asm_out_file);
9852 assemble_name_raw (asm_out_file, end_lab);
9853 fputc ('\n', asm_out_file);
9855 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9856 xops[1] = GEN_INT (PROBE_INTERVAL);
9857 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9859 /* Probe at TEST_ADDR. */
9860 xops[0] = stack_pointer_rtx;
9862 xops[2] = const0_rtx;
9863 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9865 fprintf (asm_out_file, "\tjmp\t");
9866 assemble_name_raw (asm_out_file, loop_lab);
9867 fputc ('\n', asm_out_file);
9869 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9874 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9875 to be generated in correct form. */
9877 ix86_finalize_stack_realign_flags (void)
9879 /* Check if stack realign is really needed after reload, and
9880 stores result in cfun */
9881 unsigned int incoming_stack_boundary
9882 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9883 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9884 unsigned int stack_realign = (incoming_stack_boundary
9885 < (current_function_is_leaf
9886 ? crtl->max_used_stack_slot_alignment
9887 : crtl->stack_alignment_needed));
9889 if (crtl->stack_realign_finalized)
9891 /* After stack_realign_needed is finalized, we can't no longer
9893 gcc_assert (crtl->stack_realign_needed == stack_realign);
9897 crtl->stack_realign_needed = stack_realign;
9898 crtl->stack_realign_finalized = true;
9902 /* Expand the prologue into a bunch of separate insns. */
9905 ix86_expand_prologue (void)
9907 struct machine_function *m = cfun->machine;
9910 struct ix86_frame frame;
9911 HOST_WIDE_INT allocate;
9912 bool int_registers_saved;
9914 ix86_finalize_stack_realign_flags ();
9916 /* DRAP should not coexist with stack_realign_fp */
9917 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9919 memset (&m->fs, 0, sizeof (m->fs));
9921 /* Initialize CFA state for before the prologue. */
9922 m->fs.cfa_reg = stack_pointer_rtx;
9923 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9925 /* Track SP offset to the CFA. We continue tracking this after we've
9926 swapped the CFA register away from SP. In the case of re-alignment
9927 this is fudged; we're interested to offsets within the local frame. */
9928 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9929 m->fs.sp_valid = true;
9931 ix86_compute_frame_layout (&frame);
9933 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9935 /* We should have already generated an error for any use of
9936 ms_hook on a nested function. */
9937 gcc_checking_assert (!ix86_static_chain_on_stack);
9939 /* Check if profiling is active and we shall use profiling before
9940 prologue variant. If so sorry. */
9941 if (crtl->profile && flag_fentry != 0)
9942 sorry ("ms_hook_prologue attribute isn%'t compatible "
9943 "with -mfentry for 32-bit");
9945 /* In ix86_asm_output_function_label we emitted:
9946 8b ff movl.s %edi,%edi
9948 8b ec movl.s %esp,%ebp
9950 This matches the hookable function prologue in Win32 API
9951 functions in Microsoft Windows XP Service Pack 2 and newer.
9952 Wine uses this to enable Windows apps to hook the Win32 API
9953 functions provided by Wine.
9955 What that means is that we've already set up the frame pointer. */
9957 if (frame_pointer_needed
9958 && !(crtl->drap_reg && crtl->stack_realign_needed))
9962 /* We've decided to use the frame pointer already set up.
9963 Describe this to the unwinder by pretending that both
9964 push and mov insns happen right here.
9966 Putting the unwind info here at the end of the ms_hook
9967 is done so that we can make absolutely certain we get
9968 the required byte sequence at the start of the function,
9969 rather than relying on an assembler that can produce
9970 the exact encoding required.
9972 However it does mean (in the unpatched case) that we have
9973 a 1 insn window where the asynchronous unwind info is
9974 incorrect. However, if we placed the unwind info at
9975 its correct location we would have incorrect unwind info
9976 in the patched case. Which is probably all moot since
9977 I don't expect Wine generates dwarf2 unwind info for the
9978 system libraries that use this feature. */
9980 insn = emit_insn (gen_blockage ());
9982 push = gen_push (hard_frame_pointer_rtx);
9983 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9985 RTX_FRAME_RELATED_P (push) = 1;
9986 RTX_FRAME_RELATED_P (mov) = 1;
9988 RTX_FRAME_RELATED_P (insn) = 1;
9989 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9990 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9992 /* Note that gen_push incremented m->fs.cfa_offset, even
9993 though we didn't emit the push insn here. */
9994 m->fs.cfa_reg = hard_frame_pointer_rtx;
9995 m->fs.fp_offset = m->fs.cfa_offset;
9996 m->fs.fp_valid = true;
10000 /* The frame pointer is not needed so pop %ebp again.
10001 This leaves us with a pristine state. */
10002 emit_insn (gen_pop (hard_frame_pointer_rtx));
10006 /* The first insn of a function that accepts its static chain on the
10007 stack is to push the register that would be filled in by a direct
10008 call. This insn will be skipped by the trampoline. */
10009 else if (ix86_static_chain_on_stack)
10011 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10012 emit_insn (gen_blockage ());
10014 /* We don't want to interpret this push insn as a register save,
10015 only as a stack adjustment. The real copy of the register as
10016 a save will be done later, if needed. */
10017 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10018 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10019 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10020 RTX_FRAME_RELATED_P (insn) = 1;
10023 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10024 of DRAP is needed and stack realignment is really needed after reload */
10025 if (stack_realign_drap)
10027 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10029 /* Only need to push parameter pointer reg if it is caller saved. */
10030 if (!call_used_regs[REGNO (crtl->drap_reg)])
10032 /* Push arg pointer reg */
10033 insn = emit_insn (gen_push (crtl->drap_reg));
10034 RTX_FRAME_RELATED_P (insn) = 1;
10037 /* Grab the argument pointer. */
10038 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10039 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10040 RTX_FRAME_RELATED_P (insn) = 1;
10041 m->fs.cfa_reg = crtl->drap_reg;
10042 m->fs.cfa_offset = 0;
10044 /* Align the stack. */
10045 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10047 GEN_INT (-align_bytes)));
10048 RTX_FRAME_RELATED_P (insn) = 1;
10050 /* Replicate the return address on the stack so that return
10051 address can be reached via (argp - 1) slot. This is needed
10052 to implement macro RETURN_ADDR_RTX and intrinsic function
10053 expand_builtin_return_addr etc. */
10054 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10055 t = gen_frame_mem (Pmode, t);
10056 insn = emit_insn (gen_push (t));
10057 RTX_FRAME_RELATED_P (insn) = 1;
10059 /* For the purposes of frame and register save area addressing,
10060 we've started over with a new frame. */
10061 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10062 m->fs.realigned = true;
10065 if (frame_pointer_needed && !m->fs.fp_valid)
10067 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10068 slower on all targets. Also sdb doesn't like it. */
10069 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10070 RTX_FRAME_RELATED_P (insn) = 1;
10072 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10074 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10075 RTX_FRAME_RELATED_P (insn) = 1;
10077 if (m->fs.cfa_reg == stack_pointer_rtx)
10078 m->fs.cfa_reg = hard_frame_pointer_rtx;
10079 m->fs.fp_offset = m->fs.sp_offset;
10080 m->fs.fp_valid = true;
10084 int_registers_saved = (frame.nregs == 0);
10086 if (!int_registers_saved)
10088 /* If saving registers via PUSH, do so now. */
10089 if (!frame.save_regs_using_mov)
10091 ix86_emit_save_regs ();
10092 int_registers_saved = true;
10093 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10096 /* When using red zone we may start register saving before allocating
10097 the stack frame saving one cycle of the prologue. However, avoid
10098 doing this if we have to probe the stack; at least on x86_64 the
10099 stack probe can turn into a call that clobbers a red zone location. */
10100 else if (ix86_using_red_zone ()
10101 && (! TARGET_STACK_PROBE
10102 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10104 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10105 int_registers_saved = true;
10109 if (stack_realign_fp)
10111 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10112 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10114 /* The computation of the size of the re-aligned stack frame means
10115 that we must allocate the size of the register save area before
10116 performing the actual alignment. Otherwise we cannot guarantee
10117 that there's enough storage above the realignment point. */
10118 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10119 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10120 GEN_INT (m->fs.sp_offset
10121 - frame.sse_reg_save_offset),
10124 /* Align the stack. */
10125 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10127 GEN_INT (-align_bytes)));
10129 /* For the purposes of register save area addressing, the stack
10130 pointer is no longer valid. As for the value of sp_offset,
10131 see ix86_compute_frame_layout, which we need to match in order
10132 to pass verification of stack_pointer_offset at the end. */
10133 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10134 m->fs.sp_valid = false;
10137 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10139 if (flag_stack_usage_info)
10141 /* We start to count from ARG_POINTER. */
10142 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10144 /* If it was realigned, take into account the fake frame. */
10145 if (stack_realign_drap)
10147 if (ix86_static_chain_on_stack)
10148 stack_size += UNITS_PER_WORD;
10150 if (!call_used_regs[REGNO (crtl->drap_reg)])
10151 stack_size += UNITS_PER_WORD;
10153 /* This over-estimates by 1 minimal-stack-alignment-unit but
10154 mitigates that by counting in the new return address slot. */
10155 current_function_dynamic_stack_size
10156 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10159 current_function_static_stack_size = stack_size;
10162 /* The stack has already been decremented by the instruction calling us
10163 so probe if the size is non-negative to preserve the protection area. */
10164 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10166 /* We expect the registers to be saved when probes are used. */
10167 gcc_assert (int_registers_saved);
10169 if (STACK_CHECK_MOVING_SP)
10171 ix86_adjust_stack_and_probe (allocate);
10176 HOST_WIDE_INT size = allocate;
10178 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10179 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10181 if (TARGET_STACK_PROBE)
10182 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10184 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10190 else if (!ix86_target_stack_probe ()
10191 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10193 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10194 GEN_INT (-allocate), -1,
10195 m->fs.cfa_reg == stack_pointer_rtx);
10199 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10201 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10203 bool eax_live = false;
10204 bool r10_live = false;
10207 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10208 if (!TARGET_64BIT_MS_ABI)
10209 eax_live = ix86_eax_live_at_start_p ();
10213 emit_insn (gen_push (eax));
10214 allocate -= UNITS_PER_WORD;
10218 r10 = gen_rtx_REG (Pmode, R10_REG);
10219 emit_insn (gen_push (r10));
10220 allocate -= UNITS_PER_WORD;
10223 emit_move_insn (eax, GEN_INT (allocate));
10224 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10226 /* Use the fact that AX still contains ALLOCATE. */
10227 adjust_stack_insn = (TARGET_64BIT
10228 ? gen_pro_epilogue_adjust_stack_di_sub
10229 : gen_pro_epilogue_adjust_stack_si_sub);
10231 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10232 stack_pointer_rtx, eax));
10234 /* Note that SEH directives need to continue tracking the stack
10235 pointer even after the frame pointer has been set up. */
10236 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10238 if (m->fs.cfa_reg == stack_pointer_rtx)
10239 m->fs.cfa_offset += allocate;
10241 RTX_FRAME_RELATED_P (insn) = 1;
10242 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10243 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10244 plus_constant (stack_pointer_rtx,
10247 m->fs.sp_offset += allocate;
10249 if (r10_live && eax_live)
10251 t = choose_baseaddr (m->fs.sp_offset - allocate);
10252 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10253 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10254 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10256 else if (eax_live || r10_live)
10258 t = choose_baseaddr (m->fs.sp_offset - allocate);
10259 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10262 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10264 /* If we havn't already set up the frame pointer, do so now. */
10265 if (frame_pointer_needed && !m->fs.fp_valid)
10267 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10268 GEN_INT (frame.stack_pointer_offset
10269 - frame.hard_frame_pointer_offset));
10270 insn = emit_insn (insn);
10271 RTX_FRAME_RELATED_P (insn) = 1;
10272 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10274 if (m->fs.cfa_reg == stack_pointer_rtx)
10275 m->fs.cfa_reg = hard_frame_pointer_rtx;
10276 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10277 m->fs.fp_valid = true;
10280 if (!int_registers_saved)
10281 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10282 if (frame.nsseregs)
10283 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10285 pic_reg_used = false;
10286 if (pic_offset_table_rtx
10287 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10290 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10292 if (alt_pic_reg_used != INVALID_REGNUM)
10293 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10295 pic_reg_used = true;
10302 if (ix86_cmodel == CM_LARGE_PIC)
10304 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10305 rtx label = gen_label_rtx ();
10306 emit_label (label);
10307 LABEL_PRESERVE_P (label) = 1;
10308 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10309 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10310 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10311 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10312 pic_offset_table_rtx, tmp_reg));
10315 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10319 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10320 RTX_FRAME_RELATED_P (insn) = 1;
10321 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10325 /* In the pic_reg_used case, make sure that the got load isn't deleted
10326 when mcount needs it. Blockage to avoid call movement across mcount
10327 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10329 if (crtl->profile && !flag_fentry && pic_reg_used)
10330 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10332 if (crtl->drap_reg && !crtl->stack_realign_needed)
10334 /* vDRAP is setup but after reload it turns out stack realign
10335 isn't necessary, here we will emit prologue to setup DRAP
10336 without stack realign adjustment */
10337 t = choose_baseaddr (0);
10338 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10341 /* Prevent instructions from being scheduled into register save push
10342 sequence when access to the redzone area is done through frame pointer.
10343 The offset between the frame pointer and the stack pointer is calculated
10344 relative to the value of the stack pointer at the end of the function
10345 prologue, and moving instructions that access redzone area via frame
10346 pointer inside push sequence violates this assumption. */
10347 if (frame_pointer_needed && frame.red_zone_size)
10348 emit_insn (gen_memory_blockage ());
10350 /* Emit cld instruction if stringops are used in the function. */
10351 if (TARGET_CLD && ix86_current_function_needs_cld)
10352 emit_insn (gen_cld ());
10354 /* SEH requires that the prologue end within 256 bytes of the start of
10355 the function. Prevent instruction schedules that would extend that.
10356 Further, prevent alloca modifications to the stack pointer from being
10357 combined with prologue modifications. */
10359 emit_insn (gen_prologue_use (stack_pointer_rtx));
10362 /* Emit code to restore REG using a POP insn. */
10365 ix86_emit_restore_reg_using_pop (rtx reg)
10367 struct machine_function *m = cfun->machine;
10368 rtx insn = emit_insn (gen_pop (reg));
10370 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10371 m->fs.sp_offset -= UNITS_PER_WORD;
10373 if (m->fs.cfa_reg == crtl->drap_reg
10374 && REGNO (reg) == REGNO (crtl->drap_reg))
10376 /* Previously we'd represented the CFA as an expression
10377 like *(%ebp - 8). We've just popped that value from
10378 the stack, which means we need to reset the CFA to
10379 the drap register. This will remain until we restore
10380 the stack pointer. */
10381 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10382 RTX_FRAME_RELATED_P (insn) = 1;
10384 /* This means that the DRAP register is valid for addressing too. */
10385 m->fs.drap_valid = true;
10389 if (m->fs.cfa_reg == stack_pointer_rtx)
10391 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10392 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10393 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10394 RTX_FRAME_RELATED_P (insn) = 1;
10396 m->fs.cfa_offset -= UNITS_PER_WORD;
10399 /* When the frame pointer is the CFA, and we pop it, we are
10400 swapping back to the stack pointer as the CFA. This happens
10401 for stack frames that don't allocate other data, so we assume
10402 the stack pointer is now pointing at the return address, i.e.
10403 the function entry state, which makes the offset be 1 word. */
10404 if (reg == hard_frame_pointer_rtx)
10406 m->fs.fp_valid = false;
10407 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10409 m->fs.cfa_reg = stack_pointer_rtx;
10410 m->fs.cfa_offset -= UNITS_PER_WORD;
10412 add_reg_note (insn, REG_CFA_DEF_CFA,
10413 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10414 GEN_INT (m->fs.cfa_offset)));
10415 RTX_FRAME_RELATED_P (insn) = 1;
10420 /* Emit code to restore saved registers using POP insns. */
10423 ix86_emit_restore_regs_using_pop (void)
10425 unsigned int regno;
10427 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10428 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10429 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10432 /* Emit code and notes for the LEAVE instruction. */
10435 ix86_emit_leave (void)
10437 struct machine_function *m = cfun->machine;
10438 rtx insn = emit_insn (ix86_gen_leave ());
10440 ix86_add_queued_cfa_restore_notes (insn);
10442 gcc_assert (m->fs.fp_valid);
10443 m->fs.sp_valid = true;
10444 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10445 m->fs.fp_valid = false;
10447 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10449 m->fs.cfa_reg = stack_pointer_rtx;
10450 m->fs.cfa_offset = m->fs.sp_offset;
10452 add_reg_note (insn, REG_CFA_DEF_CFA,
10453 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10454 RTX_FRAME_RELATED_P (insn) = 1;
10455 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10460 /* Emit code to restore saved registers using MOV insns.
10461 First register is restored from CFA - CFA_OFFSET. */
10463 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10464 bool maybe_eh_return)
10466 struct machine_function *m = cfun->machine;
10467 unsigned int regno;
10469 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10470 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10472 rtx reg = gen_rtx_REG (Pmode, regno);
10475 mem = choose_baseaddr (cfa_offset);
10476 mem = gen_frame_mem (Pmode, mem);
10477 insn = emit_move_insn (reg, mem);
10479 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10481 /* Previously we'd represented the CFA as an expression
10482 like *(%ebp - 8). We've just popped that value from
10483 the stack, which means we need to reset the CFA to
10484 the drap register. This will remain until we restore
10485 the stack pointer. */
10486 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10487 RTX_FRAME_RELATED_P (insn) = 1;
10489 /* This means that the DRAP register is valid for addressing. */
10490 m->fs.drap_valid = true;
10493 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10495 cfa_offset -= UNITS_PER_WORD;
10499 /* Emit code to restore saved registers using MOV insns.
10500 First register is restored from CFA - CFA_OFFSET. */
10502 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10503 bool maybe_eh_return)
10505 unsigned int regno;
10507 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10508 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10510 rtx reg = gen_rtx_REG (V4SFmode, regno);
10513 mem = choose_baseaddr (cfa_offset);
10514 mem = gen_rtx_MEM (V4SFmode, mem);
10515 set_mem_align (mem, 128);
10516 emit_move_insn (reg, mem);
10518 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10524 /* Restore function stack, frame, and registers. */
10527 ix86_expand_epilogue (int style)
10529 struct machine_function *m = cfun->machine;
10530 struct machine_frame_state frame_state_save = m->fs;
10531 struct ix86_frame frame;
10532 bool restore_regs_via_mov;
10535 ix86_finalize_stack_realign_flags ();
10536 ix86_compute_frame_layout (&frame);
10538 m->fs.sp_valid = (!frame_pointer_needed
10539 || (current_function_sp_is_unchanging
10540 && !stack_realign_fp));
10541 gcc_assert (!m->fs.sp_valid
10542 || m->fs.sp_offset == frame.stack_pointer_offset);
10544 /* The FP must be valid if the frame pointer is present. */
10545 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10546 gcc_assert (!m->fs.fp_valid
10547 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10549 /* We must have *some* valid pointer to the stack frame. */
10550 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10552 /* The DRAP is never valid at this point. */
10553 gcc_assert (!m->fs.drap_valid);
10555 /* See the comment about red zone and frame
10556 pointer usage in ix86_expand_prologue. */
10557 if (frame_pointer_needed && frame.red_zone_size)
10558 emit_insn (gen_memory_blockage ());
10560 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10561 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10563 /* Determine the CFA offset of the end of the red-zone. */
10564 m->fs.red_zone_offset = 0;
10565 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10567 /* The red-zone begins below the return address. */
10568 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10570 /* When the register save area is in the aligned portion of
10571 the stack, determine the maximum runtime displacement that
10572 matches up with the aligned frame. */
10573 if (stack_realign_drap)
10574 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10578 /* Special care must be taken for the normal return case of a function
10579 using eh_return: the eax and edx registers are marked as saved, but
10580 not restored along this path. Adjust the save location to match. */
10581 if (crtl->calls_eh_return && style != 2)
10582 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10584 /* EH_RETURN requires the use of moves to function properly. */
10585 if (crtl->calls_eh_return)
10586 restore_regs_via_mov = true;
10587 /* SEH requires the use of pops to identify the epilogue. */
10588 else if (TARGET_SEH)
10589 restore_regs_via_mov = false;
10590 /* If we're only restoring one register and sp is not valid then
10591 using a move instruction to restore the register since it's
10592 less work than reloading sp and popping the register. */
10593 else if (!m->fs.sp_valid && frame.nregs <= 1)
10594 restore_regs_via_mov = true;
10595 else if (TARGET_EPILOGUE_USING_MOVE
10596 && cfun->machine->use_fast_prologue_epilogue
10597 && (frame.nregs > 1
10598 || m->fs.sp_offset != frame.reg_save_offset))
10599 restore_regs_via_mov = true;
10600 else if (frame_pointer_needed
10602 && m->fs.sp_offset != frame.reg_save_offset)
10603 restore_regs_via_mov = true;
10604 else if (frame_pointer_needed
10605 && TARGET_USE_LEAVE
10606 && cfun->machine->use_fast_prologue_epilogue
10607 && frame.nregs == 1)
10608 restore_regs_via_mov = true;
10610 restore_regs_via_mov = false;
10612 if (restore_regs_via_mov || frame.nsseregs)
10614 /* Ensure that the entire register save area is addressable via
10615 the stack pointer, if we will restore via sp. */
10617 && m->fs.sp_offset > 0x7fffffff
10618 && !(m->fs.fp_valid || m->fs.drap_valid)
10619 && (frame.nsseregs + frame.nregs) != 0)
10621 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10622 GEN_INT (m->fs.sp_offset
10623 - frame.sse_reg_save_offset),
10625 m->fs.cfa_reg == stack_pointer_rtx);
10629 /* If there are any SSE registers to restore, then we have to do it
10630 via moves, since there's obviously no pop for SSE regs. */
10631 if (frame.nsseregs)
10632 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10635 if (restore_regs_via_mov)
10640 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10642 /* eh_return epilogues need %ecx added to the stack pointer. */
10645 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10647 /* Stack align doesn't work with eh_return. */
10648 gcc_assert (!stack_realign_drap);
10649 /* Neither does regparm nested functions. */
10650 gcc_assert (!ix86_static_chain_on_stack);
10652 if (frame_pointer_needed)
10654 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10655 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10656 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10658 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10659 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10661 /* Note that we use SA as a temporary CFA, as the return
10662 address is at the proper place relative to it. We
10663 pretend this happens at the FP restore insn because
10664 prior to this insn the FP would be stored at the wrong
10665 offset relative to SA, and after this insn we have no
10666 other reasonable register to use for the CFA. We don't
10667 bother resetting the CFA to the SP for the duration of
10668 the return insn. */
10669 add_reg_note (insn, REG_CFA_DEF_CFA,
10670 plus_constant (sa, UNITS_PER_WORD));
10671 ix86_add_queued_cfa_restore_notes (insn);
10672 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10673 RTX_FRAME_RELATED_P (insn) = 1;
10675 m->fs.cfa_reg = sa;
10676 m->fs.cfa_offset = UNITS_PER_WORD;
10677 m->fs.fp_valid = false;
10679 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10680 const0_rtx, style, false);
10684 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10685 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10686 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10687 ix86_add_queued_cfa_restore_notes (insn);
10689 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10690 if (m->fs.cfa_offset != UNITS_PER_WORD)
10692 m->fs.cfa_offset = UNITS_PER_WORD;
10693 add_reg_note (insn, REG_CFA_DEF_CFA,
10694 plus_constant (stack_pointer_rtx,
10696 RTX_FRAME_RELATED_P (insn) = 1;
10699 m->fs.sp_offset = UNITS_PER_WORD;
10700 m->fs.sp_valid = true;
10705 /* SEH requires that the function end with (1) a stack adjustment
10706 if necessary, (2) a sequence of pops, and (3) a return or
10707 jump instruction. Prevent insns from the function body from
10708 being scheduled into this sequence. */
10711 /* Prevent a catch region from being adjacent to the standard
10712 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10713 several other flags that would be interesting to test are
10715 if (flag_non_call_exceptions)
10716 emit_insn (gen_nops (const1_rtx));
10718 emit_insn (gen_blockage ());
10721 /* First step is to deallocate the stack frame so that we can
10722 pop the registers. */
10723 if (!m->fs.sp_valid)
10725 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10726 GEN_INT (m->fs.fp_offset
10727 - frame.reg_save_offset),
10730 else if (m->fs.sp_offset != frame.reg_save_offset)
10732 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10733 GEN_INT (m->fs.sp_offset
10734 - frame.reg_save_offset),
10736 m->fs.cfa_reg == stack_pointer_rtx);
10739 ix86_emit_restore_regs_using_pop ();
10742 /* If we used a stack pointer and haven't already got rid of it,
10744 if (m->fs.fp_valid)
10746 /* If the stack pointer is valid and pointing at the frame
10747 pointer store address, then we only need a pop. */
10748 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10749 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10750 /* Leave results in shorter dependency chains on CPUs that are
10751 able to grok it fast. */
10752 else if (TARGET_USE_LEAVE
10753 || optimize_function_for_size_p (cfun)
10754 || !cfun->machine->use_fast_prologue_epilogue)
10755 ix86_emit_leave ();
10758 pro_epilogue_adjust_stack (stack_pointer_rtx,
10759 hard_frame_pointer_rtx,
10760 const0_rtx, style, !using_drap);
10761 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10767 int param_ptr_offset = UNITS_PER_WORD;
10770 gcc_assert (stack_realign_drap);
10772 if (ix86_static_chain_on_stack)
10773 param_ptr_offset += UNITS_PER_WORD;
10774 if (!call_used_regs[REGNO (crtl->drap_reg)])
10775 param_ptr_offset += UNITS_PER_WORD;
10777 insn = emit_insn (gen_rtx_SET
10778 (VOIDmode, stack_pointer_rtx,
10779 gen_rtx_PLUS (Pmode,
10781 GEN_INT (-param_ptr_offset))));
10782 m->fs.cfa_reg = stack_pointer_rtx;
10783 m->fs.cfa_offset = param_ptr_offset;
10784 m->fs.sp_offset = param_ptr_offset;
10785 m->fs.realigned = false;
10787 add_reg_note (insn, REG_CFA_DEF_CFA,
10788 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10789 GEN_INT (param_ptr_offset)));
10790 RTX_FRAME_RELATED_P (insn) = 1;
10792 if (!call_used_regs[REGNO (crtl->drap_reg)])
10793 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10796 /* At this point the stack pointer must be valid, and we must have
10797 restored all of the registers. We may not have deallocated the
10798 entire stack frame. We've delayed this until now because it may
10799 be possible to merge the local stack deallocation with the
10800 deallocation forced by ix86_static_chain_on_stack. */
10801 gcc_assert (m->fs.sp_valid);
10802 gcc_assert (!m->fs.fp_valid);
10803 gcc_assert (!m->fs.realigned);
10804 if (m->fs.sp_offset != UNITS_PER_WORD)
10806 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10807 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10811 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10813 /* Sibcall epilogues don't want a return instruction. */
10816 m->fs = frame_state_save;
10820 /* Emit vzeroupper if needed. */
10821 if (TARGET_VZEROUPPER
10822 && !TREE_THIS_VOLATILE (cfun->decl)
10823 && !cfun->machine->caller_return_avx256_p)
10824 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10826 if (crtl->args.pops_args && crtl->args.size)
10828 rtx popc = GEN_INT (crtl->args.pops_args);
10830 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10831 address, do explicit add, and jump indirectly to the caller. */
10833 if (crtl->args.pops_args >= 65536)
10835 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10838 /* There is no "pascal" calling convention in any 64bit ABI. */
10839 gcc_assert (!TARGET_64BIT);
10841 insn = emit_insn (gen_pop (ecx));
10842 m->fs.cfa_offset -= UNITS_PER_WORD;
10843 m->fs.sp_offset -= UNITS_PER_WORD;
10845 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10846 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10847 add_reg_note (insn, REG_CFA_REGISTER,
10848 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10849 RTX_FRAME_RELATED_P (insn) = 1;
10851 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10853 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10856 emit_jump_insn (gen_simple_return_pop_internal (popc));
10859 emit_jump_insn (gen_simple_return_internal ());
10861 /* Restore the state back to the state from the prologue,
10862 so that it's correct for the next epilogue. */
10863 m->fs = frame_state_save;
10866 /* Reset from the function's potential modifications. */
10869 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10870 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10872 if (pic_offset_table_rtx)
10873 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10875 /* Mach-O doesn't support labels at the end of objects, so if
10876 it looks like we might want one, insert a NOP. */
10878 rtx insn = get_last_insn ();
10881 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10882 insn = PREV_INSN (insn);
10886 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10887 fputs ("\tnop\n", file);
10893 /* Return a scratch register to use in the split stack prologue. The
10894 split stack prologue is used for -fsplit-stack. It is the first
10895 instructions in the function, even before the regular prologue.
10896 The scratch register can be any caller-saved register which is not
10897 used for parameters or for the static chain. */
10899 static unsigned int
10900 split_stack_prologue_scratch_regno (void)
10909 is_fastcall = (lookup_attribute ("fastcall",
10910 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10912 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10916 if (DECL_STATIC_CHAIN (cfun->decl))
10918 sorry ("-fsplit-stack does not support fastcall with "
10919 "nested function");
10920 return INVALID_REGNUM;
10924 else if (regparm < 3)
10926 if (!DECL_STATIC_CHAIN (cfun->decl))
10932 sorry ("-fsplit-stack does not support 2 register "
10933 " parameters for a nested function");
10934 return INVALID_REGNUM;
10941 /* FIXME: We could make this work by pushing a register
10942 around the addition and comparison. */
10943 sorry ("-fsplit-stack does not support 3 register parameters");
10944 return INVALID_REGNUM;
10949 /* A SYMBOL_REF for the function which allocates new stackspace for
10952 static GTY(()) rtx split_stack_fn;
10954 /* A SYMBOL_REF for the more stack function when using the large
10957 static GTY(()) rtx split_stack_fn_large;
10959 /* Handle -fsplit-stack. These are the first instructions in the
10960 function, even before the regular prologue. */
10963 ix86_expand_split_stack_prologue (void)
10965 struct ix86_frame frame;
10966 HOST_WIDE_INT allocate;
10967 unsigned HOST_WIDE_INT args_size;
10968 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10969 rtx scratch_reg = NULL_RTX;
10970 rtx varargs_label = NULL_RTX;
10973 gcc_assert (flag_split_stack && reload_completed);
10975 ix86_finalize_stack_realign_flags ();
10976 ix86_compute_frame_layout (&frame);
10977 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10979 /* This is the label we will branch to if we have enough stack
10980 space. We expect the basic block reordering pass to reverse this
10981 branch if optimizing, so that we branch in the unlikely case. */
10982 label = gen_label_rtx ();
10984 /* We need to compare the stack pointer minus the frame size with
10985 the stack boundary in the TCB. The stack boundary always gives
10986 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10987 can compare directly. Otherwise we need to do an addition. */
10989 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10990 UNSPEC_STACK_CHECK);
10991 limit = gen_rtx_CONST (Pmode, limit);
10992 limit = gen_rtx_MEM (Pmode, limit);
10993 if (allocate < SPLIT_STACK_AVAILABLE)
10994 current = stack_pointer_rtx;
10997 unsigned int scratch_regno;
11000 /* We need a scratch register to hold the stack pointer minus
11001 the required frame size. Since this is the very start of the
11002 function, the scratch register can be any caller-saved
11003 register which is not used for parameters. */
11004 offset = GEN_INT (- allocate);
11005 scratch_regno = split_stack_prologue_scratch_regno ();
11006 if (scratch_regno == INVALID_REGNUM)
11008 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11009 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11011 /* We don't use ix86_gen_add3 in this case because it will
11012 want to split to lea, but when not optimizing the insn
11013 will not be split after this point. */
11014 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11015 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11020 emit_move_insn (scratch_reg, offset);
11021 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11022 stack_pointer_rtx));
11024 current = scratch_reg;
11027 ix86_expand_branch (GEU, current, limit, label);
11028 jump_insn = get_last_insn ();
11029 JUMP_LABEL (jump_insn) = label;
11031 /* Mark the jump as very likely to be taken. */
11032 add_reg_note (jump_insn, REG_BR_PROB,
11033 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11035 if (split_stack_fn == NULL_RTX)
11036 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11037 fn = split_stack_fn;
11039 /* Get more stack space. We pass in the desired stack space and the
11040 size of the arguments to copy to the new stack. In 32-bit mode
11041 we push the parameters; __morestack will return on a new stack
11042 anyhow. In 64-bit mode we pass the parameters in r10 and
11044 allocate_rtx = GEN_INT (allocate);
11045 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11046 call_fusage = NULL_RTX;
11051 reg10 = gen_rtx_REG (Pmode, R10_REG);
11052 reg11 = gen_rtx_REG (Pmode, R11_REG);
11054 /* If this function uses a static chain, it will be in %r10.
11055 Preserve it across the call to __morestack. */
11056 if (DECL_STATIC_CHAIN (cfun->decl))
11060 rax = gen_rtx_REG (Pmode, AX_REG);
11061 emit_move_insn (rax, reg10);
11062 use_reg (&call_fusage, rax);
11065 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11067 HOST_WIDE_INT argval;
11069 /* When using the large model we need to load the address
11070 into a register, and we've run out of registers. So we
11071 switch to a different calling convention, and we call a
11072 different function: __morestack_large. We pass the
11073 argument size in the upper 32 bits of r10 and pass the
11074 frame size in the lower 32 bits. */
11075 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11076 gcc_assert ((args_size & 0xffffffff) == args_size);
11078 if (split_stack_fn_large == NULL_RTX)
11079 split_stack_fn_large =
11080 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11082 if (ix86_cmodel == CM_LARGE_PIC)
11086 label = gen_label_rtx ();
11087 emit_label (label);
11088 LABEL_PRESERVE_P (label) = 1;
11089 emit_insn (gen_set_rip_rex64 (reg10, label));
11090 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11091 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11092 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11094 x = gen_rtx_CONST (Pmode, x);
11095 emit_move_insn (reg11, x);
11096 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11097 x = gen_const_mem (Pmode, x);
11098 emit_move_insn (reg11, x);
11101 emit_move_insn (reg11, split_stack_fn_large);
11105 argval = ((args_size << 16) << 16) + allocate;
11106 emit_move_insn (reg10, GEN_INT (argval));
11110 emit_move_insn (reg10, allocate_rtx);
11111 emit_move_insn (reg11, GEN_INT (args_size));
11112 use_reg (&call_fusage, reg11);
11115 use_reg (&call_fusage, reg10);
11119 emit_insn (gen_push (GEN_INT (args_size)));
11120 emit_insn (gen_push (allocate_rtx));
11122 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11123 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11125 add_function_usage_to (call_insn, call_fusage);
11127 /* In order to make call/return prediction work right, we now need
11128 to execute a return instruction. See
11129 libgcc/config/i386/morestack.S for the details on how this works.
11131 For flow purposes gcc must not see this as a return
11132 instruction--we need control flow to continue at the subsequent
11133 label. Therefore, we use an unspec. */
11134 gcc_assert (crtl->args.pops_args < 65536);
11135 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11137 /* If we are in 64-bit mode and this function uses a static chain,
11138 we saved %r10 in %rax before calling _morestack. */
11139 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11140 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11141 gen_rtx_REG (Pmode, AX_REG));
11143 /* If this function calls va_start, we need to store a pointer to
11144 the arguments on the old stack, because they may not have been
11145 all copied to the new stack. At this point the old stack can be
11146 found at the frame pointer value used by __morestack, because
11147 __morestack has set that up before calling back to us. Here we
11148 store that pointer in a scratch register, and in
11149 ix86_expand_prologue we store the scratch register in a stack
11151 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11153 unsigned int scratch_regno;
11157 scratch_regno = split_stack_prologue_scratch_regno ();
11158 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11159 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11163 return address within this function
11164 return address of caller of this function
11166 So we add three words to get to the stack arguments.
11170 return address within this function
11171 first argument to __morestack
11172 second argument to __morestack
11173 return address of caller of this function
11175 So we add five words to get to the stack arguments.
11177 words = TARGET_64BIT ? 3 : 5;
11178 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11179 gen_rtx_PLUS (Pmode, frame_reg,
11180 GEN_INT (words * UNITS_PER_WORD))));
11182 varargs_label = gen_label_rtx ();
11183 emit_jump_insn (gen_jump (varargs_label));
11184 JUMP_LABEL (get_last_insn ()) = varargs_label;
11189 emit_label (label);
11190 LABEL_NUSES (label) = 1;
11192 /* If this function calls va_start, we now have to set the scratch
11193 register for the case where we do not call __morestack. In this
11194 case we need to set it based on the stack pointer. */
11195 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11197 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11198 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11199 GEN_INT (UNITS_PER_WORD))));
11201 emit_label (varargs_label);
11202 LABEL_NUSES (varargs_label) = 1;
11206 /* We may have to tell the dataflow pass that the split stack prologue
11207 is initializing a scratch register. */
11210 ix86_live_on_entry (bitmap regs)
11212 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11214 gcc_assert (flag_split_stack);
11215 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11219 /* Determine if op is suitable SUBREG RTX for address. */
11222 ix86_address_subreg_operand (rtx op)
11224 enum machine_mode mode;
11229 mode = GET_MODE (op);
11231 if (GET_MODE_CLASS (mode) != MODE_INT)
11234 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11235 failures when the register is one word out of a two word structure. */
11236 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11239 /* Allow only SUBREGs of non-eliminable hard registers. */
11240 return register_no_elim_operand (op, mode);
11243 /* Extract the parts of an RTL expression that is a valid memory address
11244 for an instruction. Return 0 if the structure of the address is
11245 grossly off. Return -1 if the address contains ASHIFT, so it is not
11246 strictly valid, but still used for computing length of lea instruction. */
11249 ix86_decompose_address (rtx addr, struct ix86_address *out)
11251 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11252 rtx base_reg, index_reg;
11253 HOST_WIDE_INT scale = 1;
11254 rtx scale_rtx = NULL_RTX;
11257 enum ix86_address_seg seg = SEG_DEFAULT;
11259 /* Allow zero-extended SImode addresses,
11260 they will be emitted with addr32 prefix. */
11261 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11263 if (GET_CODE (addr) == ZERO_EXTEND
11264 && GET_MODE (XEXP (addr, 0)) == SImode)
11265 addr = XEXP (addr, 0);
11266 else if (GET_CODE (addr) == AND
11267 && const_32bit_mask (XEXP (addr, 1), DImode))
11269 addr = XEXP (addr, 0);
11271 /* Strip subreg. */
11272 if (GET_CODE (addr) == SUBREG
11273 && GET_MODE (SUBREG_REG (addr)) == SImode)
11274 addr = SUBREG_REG (addr);
11280 else if (GET_CODE (addr) == SUBREG)
11282 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11287 else if (GET_CODE (addr) == PLUS)
11289 rtx addends[4], op;
11297 addends[n++] = XEXP (op, 1);
11300 while (GET_CODE (op) == PLUS);
11305 for (i = n; i >= 0; --i)
11308 switch (GET_CODE (op))
11313 index = XEXP (op, 0);
11314 scale_rtx = XEXP (op, 1);
11320 index = XEXP (op, 0);
11321 tmp = XEXP (op, 1);
11322 if (!CONST_INT_P (tmp))
11324 scale = INTVAL (tmp);
11325 if ((unsigned HOST_WIDE_INT) scale > 3)
11327 scale = 1 << scale;
11331 if (XINT (op, 1) == UNSPEC_TP
11332 && TARGET_TLS_DIRECT_SEG_REFS
11333 && seg == SEG_DEFAULT)
11334 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11340 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11367 else if (GET_CODE (addr) == MULT)
11369 index = XEXP (addr, 0); /* index*scale */
11370 scale_rtx = XEXP (addr, 1);
11372 else if (GET_CODE (addr) == ASHIFT)
11374 /* We're called for lea too, which implements ashift on occasion. */
11375 index = XEXP (addr, 0);
11376 tmp = XEXP (addr, 1);
11377 if (!CONST_INT_P (tmp))
11379 scale = INTVAL (tmp);
11380 if ((unsigned HOST_WIDE_INT) scale > 3)
11382 scale = 1 << scale;
11386 disp = addr; /* displacement */
11392 else if (GET_CODE (index) == SUBREG
11393 && ix86_address_subreg_operand (SUBREG_REG (index)))
11399 /* Extract the integral value of scale. */
11402 if (!CONST_INT_P (scale_rtx))
11404 scale = INTVAL (scale_rtx);
11407 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11408 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11410 /* Avoid useless 0 displacement. */
11411 if (disp == const0_rtx && (base || index))
11414 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11415 if (base_reg && index_reg && scale == 1
11416 && (index_reg == arg_pointer_rtx
11417 || index_reg == frame_pointer_rtx
11418 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11421 tmp = base, base = index, index = tmp;
11422 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11425 /* Special case: %ebp cannot be encoded as a base without a displacement.
11429 && (base_reg == hard_frame_pointer_rtx
11430 || base_reg == frame_pointer_rtx
11431 || base_reg == arg_pointer_rtx
11432 || (REG_P (base_reg)
11433 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11434 || REGNO (base_reg) == R13_REG))))
11437 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11438 Avoid this by transforming to [%esi+0].
11439 Reload calls address legitimization without cfun defined, so we need
11440 to test cfun for being non-NULL. */
11441 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11442 && base_reg && !index_reg && !disp
11443 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11446 /* Special case: encode reg+reg instead of reg*2. */
11447 if (!base && index && scale == 2)
11448 base = index, base_reg = index_reg, scale = 1;
11450 /* Special case: scaling cannot be encoded without base or displacement. */
11451 if (!base && !disp && index && scale != 1)
11455 out->index = index;
11457 out->scale = scale;
11463 /* Return cost of the memory address x.
11464 For i386, it is better to use a complex address than let gcc copy
11465 the address into a reg and make a new pseudo. But not if the address
11466 requires to two regs - that would mean more pseudos with longer
11469 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11471 struct ix86_address parts;
11473 int ok = ix86_decompose_address (x, &parts);
11477 if (parts.base && GET_CODE (parts.base) == SUBREG)
11478 parts.base = SUBREG_REG (parts.base);
11479 if (parts.index && GET_CODE (parts.index) == SUBREG)
11480 parts.index = SUBREG_REG (parts.index);
11482 /* Attempt to minimize number of registers in the address. */
11484 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11486 && (!REG_P (parts.index)
11487 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11491 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11493 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11494 && parts.base != parts.index)
11497 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11498 since it's predecode logic can't detect the length of instructions
11499 and it degenerates to vector decoded. Increase cost of such
11500 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11501 to split such addresses or even refuse such addresses at all.
11503 Following addressing modes are affected:
11508 The first and last case may be avoidable by explicitly coding the zero in
11509 memory address, but I don't have AMD-K6 machine handy to check this
11513 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11514 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11515 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11521 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11522 this is used for to form addresses to local data when -fPIC is in
11526 darwin_local_data_pic (rtx disp)
11528 return (GET_CODE (disp) == UNSPEC
11529 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11532 /* Determine if a given RTX is a valid constant. We already know this
11533 satisfies CONSTANT_P. */
11536 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11538 switch (GET_CODE (x))
11543 if (GET_CODE (x) == PLUS)
11545 if (!CONST_INT_P (XEXP (x, 1)))
11550 if (TARGET_MACHO && darwin_local_data_pic (x))
11553 /* Only some unspecs are valid as "constants". */
11554 if (GET_CODE (x) == UNSPEC)
11555 switch (XINT (x, 1))
11558 case UNSPEC_GOTOFF:
11559 case UNSPEC_PLTOFF:
11560 return TARGET_64BIT;
11562 case UNSPEC_NTPOFF:
11563 x = XVECEXP (x, 0, 0);
11564 return (GET_CODE (x) == SYMBOL_REF
11565 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11566 case UNSPEC_DTPOFF:
11567 x = XVECEXP (x, 0, 0);
11568 return (GET_CODE (x) == SYMBOL_REF
11569 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11574 /* We must have drilled down to a symbol. */
11575 if (GET_CODE (x) == LABEL_REF)
11577 if (GET_CODE (x) != SYMBOL_REF)
11582 /* TLS symbols are never valid. */
11583 if (SYMBOL_REF_TLS_MODEL (x))
11586 /* DLLIMPORT symbols are never valid. */
11587 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11588 && SYMBOL_REF_DLLIMPORT_P (x))
11592 /* mdynamic-no-pic */
11593 if (MACHO_DYNAMIC_NO_PIC_P)
11594 return machopic_symbol_defined_p (x);
11599 if (GET_MODE (x) == TImode
11600 && x != CONST0_RTX (TImode)
11606 if (!standard_sse_constant_p (x))
11613 /* Otherwise we handle everything else in the move patterns. */
11617 /* Determine if it's legal to put X into the constant pool. This
11618 is not possible for the address of thread-local symbols, which
11619 is checked above. */
11622 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11624 /* We can always put integral constants and vectors in memory. */
11625 switch (GET_CODE (x))
11635 return !ix86_legitimate_constant_p (mode, x);
11639 /* Nonzero if the constant value X is a legitimate general operand
11640 when generating PIC code. It is given that flag_pic is on and
11641 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11644 legitimate_pic_operand_p (rtx x)
11648 switch (GET_CODE (x))
11651 inner = XEXP (x, 0);
11652 if (GET_CODE (inner) == PLUS
11653 && CONST_INT_P (XEXP (inner, 1)))
11654 inner = XEXP (inner, 0);
11656 /* Only some unspecs are valid as "constants". */
11657 if (GET_CODE (inner) == UNSPEC)
11658 switch (XINT (inner, 1))
11661 case UNSPEC_GOTOFF:
11662 case UNSPEC_PLTOFF:
11663 return TARGET_64BIT;
11665 x = XVECEXP (inner, 0, 0);
11666 return (GET_CODE (x) == SYMBOL_REF
11667 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11668 case UNSPEC_MACHOPIC_OFFSET:
11669 return legitimate_pic_address_disp_p (x);
11677 return legitimate_pic_address_disp_p (x);
11684 /* Determine if a given CONST RTX is a valid memory displacement
11688 legitimate_pic_address_disp_p (rtx disp)
11692 /* In 64bit mode we can allow direct addresses of symbols and labels
11693 when they are not dynamic symbols. */
11696 rtx op0 = disp, op1;
11698 switch (GET_CODE (disp))
11704 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11706 op0 = XEXP (XEXP (disp, 0), 0);
11707 op1 = XEXP (XEXP (disp, 0), 1);
11708 if (!CONST_INT_P (op1)
11709 || INTVAL (op1) >= 16*1024*1024
11710 || INTVAL (op1) < -16*1024*1024)
11712 if (GET_CODE (op0) == LABEL_REF)
11714 if (GET_CODE (op0) != SYMBOL_REF)
11719 /* TLS references should always be enclosed in UNSPEC. */
11720 if (SYMBOL_REF_TLS_MODEL (op0))
11722 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11723 && ix86_cmodel != CM_LARGE_PIC)
11731 if (GET_CODE (disp) != CONST)
11733 disp = XEXP (disp, 0);
11737 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11738 of GOT tables. We should not need these anyway. */
11739 if (GET_CODE (disp) != UNSPEC
11740 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11741 && XINT (disp, 1) != UNSPEC_GOTOFF
11742 && XINT (disp, 1) != UNSPEC_PCREL
11743 && XINT (disp, 1) != UNSPEC_PLTOFF))
11746 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11747 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11753 if (GET_CODE (disp) == PLUS)
11755 if (!CONST_INT_P (XEXP (disp, 1)))
11757 disp = XEXP (disp, 0);
11761 if (TARGET_MACHO && darwin_local_data_pic (disp))
11764 if (GET_CODE (disp) != UNSPEC)
11767 switch (XINT (disp, 1))
11772 /* We need to check for both symbols and labels because VxWorks loads
11773 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11775 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11776 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11777 case UNSPEC_GOTOFF:
11778 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11779 While ABI specify also 32bit relocation but we don't produce it in
11780 small PIC model at all. */
11781 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11782 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11784 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11786 case UNSPEC_GOTTPOFF:
11787 case UNSPEC_GOTNTPOFF:
11788 case UNSPEC_INDNTPOFF:
11791 disp = XVECEXP (disp, 0, 0);
11792 return (GET_CODE (disp) == SYMBOL_REF
11793 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11794 case UNSPEC_NTPOFF:
11795 disp = XVECEXP (disp, 0, 0);
11796 return (GET_CODE (disp) == SYMBOL_REF
11797 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11798 case UNSPEC_DTPOFF:
11799 disp = XVECEXP (disp, 0, 0);
11800 return (GET_CODE (disp) == SYMBOL_REF
11801 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11807 /* Recognizes RTL expressions that are valid memory addresses for an
11808 instruction. The MODE argument is the machine mode for the MEM
11809 expression that wants to use this address.
11811 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11812 convert common non-canonical forms to canonical form so that they will
11816 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11817 rtx addr, bool strict)
11819 struct ix86_address parts;
11820 rtx base, index, disp;
11821 HOST_WIDE_INT scale;
11823 if (ix86_decompose_address (addr, &parts) <= 0)
11824 /* Decomposition failed. */
11828 index = parts.index;
11830 scale = parts.scale;
11832 /* Validate base register. */
11839 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
11840 reg = SUBREG_REG (base);
11842 /* Base is not a register. */
11845 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
11848 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11849 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11850 /* Base is not valid. */
11854 /* Validate index register. */
11861 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
11862 reg = SUBREG_REG (index);
11864 /* Index is not a register. */
11867 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
11870 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11871 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11872 /* Index is not valid. */
11876 /* Index and base should have the same mode. */
11878 && GET_MODE (base) != GET_MODE (index))
11881 /* Validate scale factor. */
11885 /* Scale without index. */
11888 if (scale != 2 && scale != 4 && scale != 8)
11889 /* Scale is not a valid multiplier. */
11893 /* Validate displacement. */
11896 if (GET_CODE (disp) == CONST
11897 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11898 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11899 switch (XINT (XEXP (disp, 0), 1))
11901 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11902 used. While ABI specify also 32bit relocations, we don't produce
11903 them at all and use IP relative instead. */
11905 case UNSPEC_GOTOFF:
11906 gcc_assert (flag_pic);
11908 goto is_legitimate_pic;
11910 /* 64bit address unspec. */
11913 case UNSPEC_GOTPCREL:
11915 gcc_assert (flag_pic);
11916 goto is_legitimate_pic;
11918 case UNSPEC_GOTTPOFF:
11919 case UNSPEC_GOTNTPOFF:
11920 case UNSPEC_INDNTPOFF:
11921 case UNSPEC_NTPOFF:
11922 case UNSPEC_DTPOFF:
11925 case UNSPEC_STACK_CHECK:
11926 gcc_assert (flag_split_stack);
11930 /* Invalid address unspec. */
11934 else if (SYMBOLIC_CONST (disp)
11938 && MACHOPIC_INDIRECT
11939 && !machopic_operand_p (disp)
11945 if (TARGET_64BIT && (index || base))
11947 /* foo@dtpoff(%rX) is ok. */
11948 if (GET_CODE (disp) != CONST
11949 || GET_CODE (XEXP (disp, 0)) != PLUS
11950 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11951 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11952 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11953 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11954 /* Non-constant pic memory reference. */
11957 else if ((!TARGET_MACHO || flag_pic)
11958 && ! legitimate_pic_address_disp_p (disp))
11959 /* Displacement is an invalid pic construct. */
11962 else if (MACHO_DYNAMIC_NO_PIC_P
11963 && !ix86_legitimate_constant_p (Pmode, disp))
11964 /* displacment must be referenced via non_lazy_pointer */
11968 /* This code used to verify that a symbolic pic displacement
11969 includes the pic_offset_table_rtx register.
11971 While this is good idea, unfortunately these constructs may
11972 be created by "adds using lea" optimization for incorrect
11981 This code is nonsensical, but results in addressing
11982 GOT table with pic_offset_table_rtx base. We can't
11983 just refuse it easily, since it gets matched by
11984 "addsi3" pattern, that later gets split to lea in the
11985 case output register differs from input. While this
11986 can be handled by separate addsi pattern for this case
11987 that never results in lea, this seems to be easier and
11988 correct fix for crash to disable this test. */
11990 else if (GET_CODE (disp) != LABEL_REF
11991 && !CONST_INT_P (disp)
11992 && (GET_CODE (disp) != CONST
11993 || !ix86_legitimate_constant_p (Pmode, disp))
11994 && (GET_CODE (disp) != SYMBOL_REF
11995 || !ix86_legitimate_constant_p (Pmode, disp)))
11996 /* Displacement is not constant. */
11998 else if (TARGET_64BIT
11999 && !x86_64_immediate_operand (disp, VOIDmode))
12000 /* Displacement is out of range. */
12004 /* Everything looks valid. */
12008 /* Determine if a given RTX is a valid constant address. */
12011 constant_address_p (rtx x)
12013 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12016 /* Return a unique alias set for the GOT. */
12018 static alias_set_type
12019 ix86_GOT_alias_set (void)
12021 static alias_set_type set = -1;
12023 set = new_alias_set ();
12027 /* Return a legitimate reference for ORIG (an address) using the
12028 register REG. If REG is 0, a new pseudo is generated.
12030 There are two types of references that must be handled:
12032 1. Global data references must load the address from the GOT, via
12033 the PIC reg. An insn is emitted to do this load, and the reg is
12036 2. Static data references, constant pool addresses, and code labels
12037 compute the address as an offset from the GOT, whose base is in
12038 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12039 differentiate them from global data objects. The returned
12040 address is the PIC reg + an unspec constant.
12042 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12043 reg also appears in the address. */
12046 legitimize_pic_address (rtx orig, rtx reg)
12049 rtx new_rtx = orig;
12053 if (TARGET_MACHO && !TARGET_64BIT)
12056 reg = gen_reg_rtx (Pmode);
12057 /* Use the generic Mach-O PIC machinery. */
12058 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12062 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12064 else if (TARGET_64BIT
12065 && ix86_cmodel != CM_SMALL_PIC
12066 && gotoff_operand (addr, Pmode))
12069 /* This symbol may be referenced via a displacement from the PIC
12070 base address (@GOTOFF). */
12072 if (reload_in_progress)
12073 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12074 if (GET_CODE (addr) == CONST)
12075 addr = XEXP (addr, 0);
12076 if (GET_CODE (addr) == PLUS)
12078 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12080 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12083 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12084 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12086 tmpreg = gen_reg_rtx (Pmode);
12089 emit_move_insn (tmpreg, new_rtx);
12093 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12094 tmpreg, 1, OPTAB_DIRECT);
12097 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12099 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12101 /* This symbol may be referenced via a displacement from the PIC
12102 base address (@GOTOFF). */
12104 if (reload_in_progress)
12105 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12106 if (GET_CODE (addr) == CONST)
12107 addr = XEXP (addr, 0);
12108 if (GET_CODE (addr) == PLUS)
12110 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12112 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12115 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12116 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12117 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12121 emit_move_insn (reg, new_rtx);
12125 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12126 /* We can't use @GOTOFF for text labels on VxWorks;
12127 see gotoff_operand. */
12128 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12130 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12132 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12133 return legitimize_dllimport_symbol (addr, true);
12134 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12135 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12136 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12138 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12139 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12143 /* For x64 PE-COFF there is no GOT table. So we use address
12145 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12147 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12148 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12151 reg = gen_reg_rtx (Pmode);
12152 emit_move_insn (reg, new_rtx);
12155 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12157 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12158 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12159 new_rtx = gen_const_mem (Pmode, new_rtx);
12160 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12163 reg = gen_reg_rtx (Pmode);
12164 /* Use directly gen_movsi, otherwise the address is loaded
12165 into register for CSE. We don't want to CSE this addresses,
12166 instead we CSE addresses from the GOT table, so skip this. */
12167 emit_insn (gen_movsi (reg, new_rtx));
12172 /* This symbol must be referenced via a load from the
12173 Global Offset Table (@GOT). */
12175 if (reload_in_progress)
12176 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12177 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12178 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12180 new_rtx = force_reg (Pmode, new_rtx);
12181 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12182 new_rtx = gen_const_mem (Pmode, new_rtx);
12183 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12186 reg = gen_reg_rtx (Pmode);
12187 emit_move_insn (reg, new_rtx);
12193 if (CONST_INT_P (addr)
12194 && !x86_64_immediate_operand (addr, VOIDmode))
12198 emit_move_insn (reg, addr);
12202 new_rtx = force_reg (Pmode, addr);
12204 else if (GET_CODE (addr) == CONST)
12206 addr = XEXP (addr, 0);
12208 /* We must match stuff we generate before. Assume the only
12209 unspecs that can get here are ours. Not that we could do
12210 anything with them anyway.... */
12211 if (GET_CODE (addr) == UNSPEC
12212 || (GET_CODE (addr) == PLUS
12213 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12215 gcc_assert (GET_CODE (addr) == PLUS);
12217 if (GET_CODE (addr) == PLUS)
12219 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12221 /* Check first to see if this is a constant offset from a @GOTOFF
12222 symbol reference. */
12223 if (gotoff_operand (op0, Pmode)
12224 && CONST_INT_P (op1))
12228 if (reload_in_progress)
12229 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12230 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12232 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12233 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12234 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12238 emit_move_insn (reg, new_rtx);
12244 if (INTVAL (op1) < -16*1024*1024
12245 || INTVAL (op1) >= 16*1024*1024)
12247 if (!x86_64_immediate_operand (op1, Pmode))
12248 op1 = force_reg (Pmode, op1);
12249 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12255 base = legitimize_pic_address (XEXP (addr, 0), reg);
12256 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12257 base == reg ? NULL_RTX : reg);
12259 if (CONST_INT_P (new_rtx))
12260 new_rtx = plus_constant (base, INTVAL (new_rtx));
12263 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12265 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12266 new_rtx = XEXP (new_rtx, 1);
12268 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12276 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12279 get_thread_pointer (bool to_reg)
12281 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12283 if (GET_MODE (tp) != Pmode)
12284 tp = convert_to_mode (Pmode, tp, 1);
12287 tp = copy_addr_to_reg (tp);
12292 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12294 static GTY(()) rtx ix86_tls_symbol;
12297 ix86_tls_get_addr (void)
12299 if (!ix86_tls_symbol)
12302 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12303 ? "___tls_get_addr" : "__tls_get_addr");
12305 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12308 return ix86_tls_symbol;
12311 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12313 static GTY(()) rtx ix86_tls_module_base_symbol;
12316 ix86_tls_module_base (void)
12318 if (!ix86_tls_module_base_symbol)
12320 ix86_tls_module_base_symbol
12321 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12323 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12324 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12327 return ix86_tls_module_base_symbol;
12330 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12331 false if we expect this to be used for a memory address and true if
12332 we expect to load the address into a register. */
12335 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12337 rtx dest, base, off;
12338 rtx pic = NULL_RTX, tp = NULL_RTX;
12343 case TLS_MODEL_GLOBAL_DYNAMIC:
12344 dest = gen_reg_rtx (Pmode);
12349 pic = pic_offset_table_rtx;
12352 pic = gen_reg_rtx (Pmode);
12353 emit_insn (gen_set_got (pic));
12357 if (TARGET_GNU2_TLS)
12360 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12362 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12364 tp = get_thread_pointer (true);
12365 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12367 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12371 rtx caddr = ix86_tls_get_addr ();
12375 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12378 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12379 insns = get_insns ();
12382 RTL_CONST_CALL_P (insns) = 1;
12383 emit_libcall_block (insns, dest, rax, x);
12386 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12390 case TLS_MODEL_LOCAL_DYNAMIC:
12391 base = gen_reg_rtx (Pmode);
12396 pic = pic_offset_table_rtx;
12399 pic = gen_reg_rtx (Pmode);
12400 emit_insn (gen_set_got (pic));
12404 if (TARGET_GNU2_TLS)
12406 rtx tmp = ix86_tls_module_base ();
12409 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12411 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12413 tp = get_thread_pointer (true);
12414 set_unique_reg_note (get_last_insn (), REG_EQUAL,
12415 gen_rtx_MINUS (Pmode, tmp, tp));
12419 rtx caddr = ix86_tls_get_addr ();
12423 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12426 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12427 insns = get_insns ();
12430 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12431 share the LD_BASE result with other LD model accesses. */
12432 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12433 UNSPEC_TLS_LD_BASE);
12435 RTL_CONST_CALL_P (insns) = 1;
12436 emit_libcall_block (insns, base, rax, eqv);
12439 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12442 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12443 off = gen_rtx_CONST (Pmode, off);
12445 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12447 if (TARGET_GNU2_TLS)
12449 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12451 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12455 case TLS_MODEL_INITIAL_EXEC:
12458 if (TARGET_SUN_TLS)
12460 /* The Sun linker took the AMD64 TLS spec literally
12461 and can only handle %rax as destination of the
12462 initial executable code sequence. */
12464 dest = gen_reg_rtx (Pmode);
12465 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12470 type = UNSPEC_GOTNTPOFF;
12474 if (reload_in_progress)
12475 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12476 pic = pic_offset_table_rtx;
12477 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12479 else if (!TARGET_ANY_GNU_TLS)
12481 pic = gen_reg_rtx (Pmode);
12482 emit_insn (gen_set_got (pic));
12483 type = UNSPEC_GOTTPOFF;
12488 type = UNSPEC_INDNTPOFF;
12491 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12492 off = gen_rtx_CONST (Pmode, off);
12494 off = gen_rtx_PLUS (Pmode, pic, off);
12495 off = gen_const_mem (Pmode, off);
12496 set_mem_alias_set (off, ix86_GOT_alias_set ());
12498 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12500 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12501 off = force_reg (Pmode, off);
12502 return gen_rtx_PLUS (Pmode, base, off);
12506 base = get_thread_pointer (true);
12507 dest = gen_reg_rtx (Pmode);
12508 emit_insn (gen_subsi3 (dest, base, off));
12512 case TLS_MODEL_LOCAL_EXEC:
12513 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12514 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12515 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12516 off = gen_rtx_CONST (Pmode, off);
12518 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12520 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12521 return gen_rtx_PLUS (Pmode, base, off);
12525 base = get_thread_pointer (true);
12526 dest = gen_reg_rtx (Pmode);
12527 emit_insn (gen_subsi3 (dest, base, off));
12532 gcc_unreachable ();
12538 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12541 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12542 htab_t dllimport_map;
12545 get_dllimport_decl (tree decl)
12547 struct tree_map *h, in;
12550 const char *prefix;
12551 size_t namelen, prefixlen;
12556 if (!dllimport_map)
12557 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12559 in.hash = htab_hash_pointer (decl);
12560 in.base.from = decl;
12561 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12562 h = (struct tree_map *) *loc;
12566 *loc = h = ggc_alloc_tree_map ();
12568 h->base.from = decl;
12569 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12570 VAR_DECL, NULL, ptr_type_node);
12571 DECL_ARTIFICIAL (to) = 1;
12572 DECL_IGNORED_P (to) = 1;
12573 DECL_EXTERNAL (to) = 1;
12574 TREE_READONLY (to) = 1;
12576 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12577 name = targetm.strip_name_encoding (name);
12578 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12579 ? "*__imp_" : "*__imp__";
12580 namelen = strlen (name);
12581 prefixlen = strlen (prefix);
12582 imp_name = (char *) alloca (namelen + prefixlen + 1);
12583 memcpy (imp_name, prefix, prefixlen);
12584 memcpy (imp_name + prefixlen, name, namelen + 1);
12586 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12587 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12588 SET_SYMBOL_REF_DECL (rtl, to);
12589 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12591 rtl = gen_const_mem (Pmode, rtl);
12592 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12594 SET_DECL_RTL (to, rtl);
12595 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12600 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12601 true if we require the result be a register. */
12604 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12609 gcc_assert (SYMBOL_REF_DECL (symbol));
12610 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12612 x = DECL_RTL (imp_decl);
12614 x = force_reg (Pmode, x);
12618 /* Try machine-dependent ways of modifying an illegitimate address
12619 to be legitimate. If we find one, return the new, valid address.
12620 This macro is used in only one place: `memory_address' in explow.c.
12622 OLDX is the address as it was before break_out_memory_refs was called.
12623 In some cases it is useful to look at this to decide what needs to be done.
12625 It is always safe for this macro to do nothing. It exists to recognize
12626 opportunities to optimize the output.
12628 For the 80386, we handle X+REG by loading X into a register R and
12629 using R+REG. R will go in a general reg and indexing will be used.
12630 However, if REG is a broken-out memory address or multiplication,
12631 nothing needs to be done because REG can certainly go in a general reg.
12633 When -fpic is used, special handling is needed for symbolic references.
12634 See comments by legitimize_pic_address in i386.c for details. */
12637 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12638 enum machine_mode mode)
12643 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12645 return legitimize_tls_address (x, (enum tls_model) log, false);
12646 if (GET_CODE (x) == CONST
12647 && GET_CODE (XEXP (x, 0)) == PLUS
12648 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12649 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12651 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12652 (enum tls_model) log, false);
12653 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12656 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12658 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12659 return legitimize_dllimport_symbol (x, true);
12660 if (GET_CODE (x) == CONST
12661 && GET_CODE (XEXP (x, 0)) == PLUS
12662 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12663 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12665 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12666 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12670 if (flag_pic && SYMBOLIC_CONST (x))
12671 return legitimize_pic_address (x, 0);
12674 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12675 return machopic_indirect_data_reference (x, 0);
12678 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12679 if (GET_CODE (x) == ASHIFT
12680 && CONST_INT_P (XEXP (x, 1))
12681 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12684 log = INTVAL (XEXP (x, 1));
12685 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12686 GEN_INT (1 << log));
12689 if (GET_CODE (x) == PLUS)
12691 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12693 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12694 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12695 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12698 log = INTVAL (XEXP (XEXP (x, 0), 1));
12699 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12700 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12701 GEN_INT (1 << log));
12704 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12705 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12706 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12709 log = INTVAL (XEXP (XEXP (x, 1), 1));
12710 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12711 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12712 GEN_INT (1 << log));
12715 /* Put multiply first if it isn't already. */
12716 if (GET_CODE (XEXP (x, 1)) == MULT)
12718 rtx tmp = XEXP (x, 0);
12719 XEXP (x, 0) = XEXP (x, 1);
12724 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12725 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12726 created by virtual register instantiation, register elimination, and
12727 similar optimizations. */
12728 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12731 x = gen_rtx_PLUS (Pmode,
12732 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12733 XEXP (XEXP (x, 1), 0)),
12734 XEXP (XEXP (x, 1), 1));
12738 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12739 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12740 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12741 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12742 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12743 && CONSTANT_P (XEXP (x, 1)))
12746 rtx other = NULL_RTX;
12748 if (CONST_INT_P (XEXP (x, 1)))
12750 constant = XEXP (x, 1);
12751 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12753 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12755 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12756 other = XEXP (x, 1);
12764 x = gen_rtx_PLUS (Pmode,
12765 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12766 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12767 plus_constant (other, INTVAL (constant)));
12771 if (changed && ix86_legitimate_address_p (mode, x, false))
12774 if (GET_CODE (XEXP (x, 0)) == MULT)
12777 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12780 if (GET_CODE (XEXP (x, 1)) == MULT)
12783 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12787 && REG_P (XEXP (x, 1))
12788 && REG_P (XEXP (x, 0)))
12791 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12794 x = legitimize_pic_address (x, 0);
12797 if (changed && ix86_legitimate_address_p (mode, x, false))
12800 if (REG_P (XEXP (x, 0)))
12802 rtx temp = gen_reg_rtx (Pmode);
12803 rtx val = force_operand (XEXP (x, 1), temp);
12806 if (GET_MODE (val) != Pmode)
12807 val = convert_to_mode (Pmode, val, 1);
12808 emit_move_insn (temp, val);
12811 XEXP (x, 1) = temp;
12815 else if (REG_P (XEXP (x, 1)))
12817 rtx temp = gen_reg_rtx (Pmode);
12818 rtx val = force_operand (XEXP (x, 0), temp);
12821 if (GET_MODE (val) != Pmode)
12822 val = convert_to_mode (Pmode, val, 1);
12823 emit_move_insn (temp, val);
12826 XEXP (x, 0) = temp;
12834 /* Print an integer constant expression in assembler syntax. Addition
12835 and subtraction are the only arithmetic that may appear in these
12836 expressions. FILE is the stdio stream to write to, X is the rtx, and
12837 CODE is the operand print code from the output string. */
12840 output_pic_addr_const (FILE *file, rtx x, int code)
12844 switch (GET_CODE (x))
12847 gcc_assert (flag_pic);
12852 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12853 output_addr_const (file, x);
12856 const char *name = XSTR (x, 0);
12858 /* Mark the decl as referenced so that cgraph will
12859 output the function. */
12860 if (SYMBOL_REF_DECL (x))
12861 mark_decl_referenced (SYMBOL_REF_DECL (x));
12864 if (MACHOPIC_INDIRECT
12865 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12866 name = machopic_indirection_name (x, /*stub_p=*/true);
12868 assemble_name (file, name);
12870 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12871 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12872 fputs ("@PLT", file);
12879 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12880 assemble_name (asm_out_file, buf);
12884 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12888 /* This used to output parentheses around the expression,
12889 but that does not work on the 386 (either ATT or BSD assembler). */
12890 output_pic_addr_const (file, XEXP (x, 0), code);
12894 if (GET_MODE (x) == VOIDmode)
12896 /* We can use %d if the number is <32 bits and positive. */
12897 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12898 fprintf (file, "0x%lx%08lx",
12899 (unsigned long) CONST_DOUBLE_HIGH (x),
12900 (unsigned long) CONST_DOUBLE_LOW (x));
12902 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12905 /* We can't handle floating point constants;
12906 TARGET_PRINT_OPERAND must handle them. */
12907 output_operand_lossage ("floating constant misused");
12911 /* Some assemblers need integer constants to appear first. */
12912 if (CONST_INT_P (XEXP (x, 0)))
12914 output_pic_addr_const (file, XEXP (x, 0), code);
12916 output_pic_addr_const (file, XEXP (x, 1), code);
12920 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12921 output_pic_addr_const (file, XEXP (x, 1), code);
12923 output_pic_addr_const (file, XEXP (x, 0), code);
12929 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12930 output_pic_addr_const (file, XEXP (x, 0), code);
12932 output_pic_addr_const (file, XEXP (x, 1), code);
12934 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12938 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12940 bool f = i386_asm_output_addr_const_extra (file, x);
12945 gcc_assert (XVECLEN (x, 0) == 1);
12946 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12947 switch (XINT (x, 1))
12950 fputs ("@GOT", file);
12952 case UNSPEC_GOTOFF:
12953 fputs ("@GOTOFF", file);
12955 case UNSPEC_PLTOFF:
12956 fputs ("@PLTOFF", file);
12959 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12960 "(%rip)" : "[rip]", file);
12962 case UNSPEC_GOTPCREL:
12963 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12964 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12966 case UNSPEC_GOTTPOFF:
12967 /* FIXME: This might be @TPOFF in Sun ld too. */
12968 fputs ("@gottpoff", file);
12971 fputs ("@tpoff", file);
12973 case UNSPEC_NTPOFF:
12975 fputs ("@tpoff", file);
12977 fputs ("@ntpoff", file);
12979 case UNSPEC_DTPOFF:
12980 fputs ("@dtpoff", file);
12982 case UNSPEC_GOTNTPOFF:
12984 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12985 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12987 fputs ("@gotntpoff", file);
12989 case UNSPEC_INDNTPOFF:
12990 fputs ("@indntpoff", file);
12993 case UNSPEC_MACHOPIC_OFFSET:
12995 machopic_output_function_base_name (file);
12999 output_operand_lossage ("invalid UNSPEC as operand");
13005 output_operand_lossage ("invalid expression as operand");
13009 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13010 We need to emit DTP-relative relocations. */
13012 static void ATTRIBUTE_UNUSED
13013 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13015 fputs (ASM_LONG, file);
13016 output_addr_const (file, x);
13017 fputs ("@dtpoff", file);
13023 fputs (", 0", file);
13026 gcc_unreachable ();
13030 /* Return true if X is a representation of the PIC register. This copes
13031 with calls from ix86_find_base_term, where the register might have
13032 been replaced by a cselib value. */
13035 ix86_pic_register_p (rtx x)
13037 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13038 return (pic_offset_table_rtx
13039 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13041 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13044 /* Helper function for ix86_delegitimize_address.
13045 Attempt to delegitimize TLS local-exec accesses. */
13048 ix86_delegitimize_tls_address (rtx orig_x)
13050 rtx x = orig_x, unspec;
13051 struct ix86_address addr;
13053 if (!TARGET_TLS_DIRECT_SEG_REFS)
13057 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13059 if (ix86_decompose_address (x, &addr) == 0
13060 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13061 || addr.disp == NULL_RTX
13062 || GET_CODE (addr.disp) != CONST)
13064 unspec = XEXP (addr.disp, 0);
13065 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13066 unspec = XEXP (unspec, 0);
13067 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13069 x = XVECEXP (unspec, 0, 0);
13070 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13071 if (unspec != XEXP (addr.disp, 0))
13072 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13075 rtx idx = addr.index;
13076 if (addr.scale != 1)
13077 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13078 x = gen_rtx_PLUS (Pmode, idx, x);
13081 x = gen_rtx_PLUS (Pmode, addr.base, x);
13082 if (MEM_P (orig_x))
13083 x = replace_equiv_address_nv (orig_x, x);
13087 /* In the name of slightly smaller debug output, and to cater to
13088 general assembler lossage, recognize PIC+GOTOFF and turn it back
13089 into a direct symbol reference.
13091 On Darwin, this is necessary to avoid a crash, because Darwin
13092 has a different PIC label for each routine but the DWARF debugging
13093 information is not associated with any particular routine, so it's
13094 necessary to remove references to the PIC label from RTL stored by
13095 the DWARF output code. */
13098 ix86_delegitimize_address (rtx x)
13100 rtx orig_x = delegitimize_mem_from_attrs (x);
13101 /* addend is NULL or some rtx if x is something+GOTOFF where
13102 something doesn't include the PIC register. */
13103 rtx addend = NULL_RTX;
13104 /* reg_addend is NULL or a multiple of some register. */
13105 rtx reg_addend = NULL_RTX;
13106 /* const_addend is NULL or a const_int. */
13107 rtx const_addend = NULL_RTX;
13108 /* This is the result, or NULL. */
13109 rtx result = NULL_RTX;
13118 if (GET_CODE (x) != CONST
13119 || GET_CODE (XEXP (x, 0)) != UNSPEC
13120 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13121 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13122 || !MEM_P (orig_x))
13123 return ix86_delegitimize_tls_address (orig_x);
13124 x = XVECEXP (XEXP (x, 0), 0, 0);
13125 if (GET_MODE (orig_x) != GET_MODE (x))
13127 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13135 if (GET_CODE (x) != PLUS
13136 || GET_CODE (XEXP (x, 1)) != CONST)
13137 return ix86_delegitimize_tls_address (orig_x);
13139 if (ix86_pic_register_p (XEXP (x, 0)))
13140 /* %ebx + GOT/GOTOFF */
13142 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13144 /* %ebx + %reg * scale + GOT/GOTOFF */
13145 reg_addend = XEXP (x, 0);
13146 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13147 reg_addend = XEXP (reg_addend, 1);
13148 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13149 reg_addend = XEXP (reg_addend, 0);
13152 reg_addend = NULL_RTX;
13153 addend = XEXP (x, 0);
13157 addend = XEXP (x, 0);
13159 x = XEXP (XEXP (x, 1), 0);
13160 if (GET_CODE (x) == PLUS
13161 && CONST_INT_P (XEXP (x, 1)))
13163 const_addend = XEXP (x, 1);
13167 if (GET_CODE (x) == UNSPEC
13168 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13169 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13170 result = XVECEXP (x, 0, 0);
13172 if (TARGET_MACHO && darwin_local_data_pic (x)
13173 && !MEM_P (orig_x))
13174 result = XVECEXP (x, 0, 0);
13177 return ix86_delegitimize_tls_address (orig_x);
13180 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13182 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13185 /* If the rest of original X doesn't involve the PIC register, add
13186 addend and subtract pic_offset_table_rtx. This can happen e.g.
13188 leal (%ebx, %ecx, 4), %ecx
13190 movl foo@GOTOFF(%ecx), %edx
13191 in which case we return (%ecx - %ebx) + foo. */
13192 if (pic_offset_table_rtx)
13193 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13194 pic_offset_table_rtx),
13199 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13201 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13202 if (result == NULL_RTX)
13208 /* If X is a machine specific address (i.e. a symbol or label being
13209 referenced as a displacement from the GOT implemented using an
13210 UNSPEC), then return the base term. Otherwise return X. */
13213 ix86_find_base_term (rtx x)
13219 if (GET_CODE (x) != CONST)
13221 term = XEXP (x, 0);
13222 if (GET_CODE (term) == PLUS
13223 && (CONST_INT_P (XEXP (term, 1))
13224 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13225 term = XEXP (term, 0);
13226 if (GET_CODE (term) != UNSPEC
13227 || (XINT (term, 1) != UNSPEC_GOTPCREL
13228 && XINT (term, 1) != UNSPEC_PCREL))
13231 return XVECEXP (term, 0, 0);
13234 return ix86_delegitimize_address (x);
13238 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13239 int fp, FILE *file)
13241 const char *suffix;
13243 if (mode == CCFPmode || mode == CCFPUmode)
13245 code = ix86_fp_compare_code_to_integer (code);
13249 code = reverse_condition (code);
13300 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13304 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13305 Those same assemblers have the same but opposite lossage on cmov. */
13306 if (mode == CCmode)
13307 suffix = fp ? "nbe" : "a";
13308 else if (mode == CCCmode)
13311 gcc_unreachable ();
13327 gcc_unreachable ();
13331 gcc_assert (mode == CCmode || mode == CCCmode);
13348 gcc_unreachable ();
13352 /* ??? As above. */
13353 gcc_assert (mode == CCmode || mode == CCCmode);
13354 suffix = fp ? "nb" : "ae";
13357 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13361 /* ??? As above. */
13362 if (mode == CCmode)
13364 else if (mode == CCCmode)
13365 suffix = fp ? "nb" : "ae";
13367 gcc_unreachable ();
13370 suffix = fp ? "u" : "p";
13373 suffix = fp ? "nu" : "np";
13376 gcc_unreachable ();
13378 fputs (suffix, file);
13381 /* Print the name of register X to FILE based on its machine mode and number.
13382 If CODE is 'w', pretend the mode is HImode.
13383 If CODE is 'b', pretend the mode is QImode.
13384 If CODE is 'k', pretend the mode is SImode.
13385 If CODE is 'q', pretend the mode is DImode.
13386 If CODE is 'x', pretend the mode is V4SFmode.
13387 If CODE is 't', pretend the mode is V8SFmode.
13388 If CODE is 'h', pretend the reg is the 'high' byte register.
13389 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13390 If CODE is 'd', duplicate the operand for AVX instruction.
13394 print_reg (rtx x, int code, FILE *file)
13397 bool duplicated = code == 'd' && TARGET_AVX;
13399 gcc_assert (x == pc_rtx
13400 || (REGNO (x) != ARG_POINTER_REGNUM
13401 && REGNO (x) != FRAME_POINTER_REGNUM
13402 && REGNO (x) != FLAGS_REG
13403 && REGNO (x) != FPSR_REG
13404 && REGNO (x) != FPCR_REG));
13406 if (ASSEMBLER_DIALECT == ASM_ATT)
13411 gcc_assert (TARGET_64BIT);
13412 fputs ("rip", file);
13416 if (code == 'w' || MMX_REG_P (x))
13418 else if (code == 'b')
13420 else if (code == 'k')
13422 else if (code == 'q')
13424 else if (code == 'y')
13426 else if (code == 'h')
13428 else if (code == 'x')
13430 else if (code == 't')
13433 code = GET_MODE_SIZE (GET_MODE (x));
13435 /* Irritatingly, AMD extended registers use different naming convention
13436 from the normal registers. */
13437 if (REX_INT_REG_P (x))
13439 gcc_assert (TARGET_64BIT);
13443 error ("extended registers have no high halves");
13446 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13449 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13452 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13455 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13458 error ("unsupported operand size for extended register");
13468 if (STACK_TOP_P (x))
13477 if (! ANY_FP_REG_P (x))
13478 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13483 reg = hi_reg_name[REGNO (x)];
13486 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13488 reg = qi_reg_name[REGNO (x)];
13491 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13493 reg = qi_high_reg_name[REGNO (x)];
13498 gcc_assert (!duplicated);
13500 fputs (hi_reg_name[REGNO (x)] + 1, file);
13505 gcc_unreachable ();
13511 if (ASSEMBLER_DIALECT == ASM_ATT)
13512 fprintf (file, ", %%%s", reg);
13514 fprintf (file, ", %s", reg);
13518 /* Locate some local-dynamic symbol still in use by this function
13519 so that we can print its name in some tls_local_dynamic_base
13523 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13527 if (GET_CODE (x) == SYMBOL_REF
13528 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13530 cfun->machine->some_ld_name = XSTR (x, 0);
13537 static const char *
13538 get_some_local_dynamic_name (void)
13542 if (cfun->machine->some_ld_name)
13543 return cfun->machine->some_ld_name;
13545 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13546 if (NONDEBUG_INSN_P (insn)
13547 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13548 return cfun->machine->some_ld_name;
13553 /* Meaning of CODE:
13554 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13555 C -- print opcode suffix for set/cmov insn.
13556 c -- like C, but print reversed condition
13557 F,f -- likewise, but for floating-point.
13558 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13560 R -- print the prefix for register names.
13561 z -- print the opcode suffix for the size of the current operand.
13562 Z -- likewise, with special suffixes for x87 instructions.
13563 * -- print a star (in certain assembler syntax)
13564 A -- print an absolute memory reference.
13565 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13566 s -- print a shift double count, followed by the assemblers argument
13568 b -- print the QImode name of the register for the indicated operand.
13569 %b0 would print %al if operands[0] is reg 0.
13570 w -- likewise, print the HImode name of the register.
13571 k -- likewise, print the SImode name of the register.
13572 q -- likewise, print the DImode name of the register.
13573 x -- likewise, print the V4SFmode name of the register.
13574 t -- likewise, print the V8SFmode name of the register.
13575 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13576 y -- print "st(0)" instead of "st" as a register.
13577 d -- print duplicated register operand for AVX instruction.
13578 D -- print condition for SSE cmp instruction.
13579 P -- if PIC, print an @PLT suffix.
13580 p -- print raw symbol name.
13581 X -- don't print any sort of PIC '@' suffix for a symbol.
13582 & -- print some in-use local-dynamic symbol name.
13583 H -- print a memory address offset by 8; used for sse high-parts
13584 Y -- print condition for XOP pcom* instruction.
13585 + -- print a branch hint as 'cs' or 'ds' prefix
13586 ; -- print a semicolon (after prefixes due to bug in older gas).
13587 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13588 @ -- print a segment register of thread base pointer load
13592 ix86_print_operand (FILE *file, rtx x, int code)
13599 if (ASSEMBLER_DIALECT == ASM_ATT)
13605 const char *name = get_some_local_dynamic_name ();
13607 output_operand_lossage ("'%%&' used without any "
13608 "local dynamic TLS references");
13610 assemble_name (file, name);
13615 switch (ASSEMBLER_DIALECT)
13622 /* Intel syntax. For absolute addresses, registers should not
13623 be surrounded by braces. */
13627 ix86_print_operand (file, x, 0);
13634 gcc_unreachable ();
13637 ix86_print_operand (file, x, 0);
13642 if (ASSEMBLER_DIALECT == ASM_ATT)
13647 if (ASSEMBLER_DIALECT == ASM_ATT)
13652 if (ASSEMBLER_DIALECT == ASM_ATT)
13657 if (ASSEMBLER_DIALECT == ASM_ATT)
13662 if (ASSEMBLER_DIALECT == ASM_ATT)
13667 if (ASSEMBLER_DIALECT == ASM_ATT)
13672 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13674 /* Opcodes don't get size suffixes if using Intel opcodes. */
13675 if (ASSEMBLER_DIALECT == ASM_INTEL)
13678 switch (GET_MODE_SIZE (GET_MODE (x)))
13697 output_operand_lossage
13698 ("invalid operand size for operand code '%c'", code);
13703 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13705 (0, "non-integer operand used with operand code '%c'", code);
13709 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13710 if (ASSEMBLER_DIALECT == ASM_INTEL)
13713 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13715 switch (GET_MODE_SIZE (GET_MODE (x)))
13718 #ifdef HAVE_AS_IX86_FILDS
13728 #ifdef HAVE_AS_IX86_FILDQ
13731 fputs ("ll", file);
13739 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13741 /* 387 opcodes don't get size suffixes
13742 if the operands are registers. */
13743 if (STACK_REG_P (x))
13746 switch (GET_MODE_SIZE (GET_MODE (x)))
13767 output_operand_lossage
13768 ("invalid operand type used with operand code '%c'", code);
13772 output_operand_lossage
13773 ("invalid operand size for operand code '%c'", code);
13791 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13793 ix86_print_operand (file, x, 0);
13794 fputs (", ", file);
13799 /* Little bit of braindamage here. The SSE compare instructions
13800 does use completely different names for the comparisons that the
13801 fp conditional moves. */
13804 switch (GET_CODE (x))
13807 fputs ("eq", file);
13810 fputs ("eq_us", file);
13813 fputs ("lt", file);
13816 fputs ("nge", file);
13819 fputs ("le", file);
13822 fputs ("ngt", file);
13825 fputs ("unord", file);
13828 fputs ("neq", file);
13831 fputs ("neq_oq", file);
13834 fputs ("ge", file);
13837 fputs ("nlt", file);
13840 fputs ("gt", file);
13843 fputs ("nle", file);
13846 fputs ("ord", file);
13849 output_operand_lossage ("operand is not a condition code, "
13850 "invalid operand code 'D'");
13856 switch (GET_CODE (x))
13860 fputs ("eq", file);
13864 fputs ("lt", file);
13868 fputs ("le", file);
13871 fputs ("unord", file);
13875 fputs ("neq", file);
13879 fputs ("nlt", file);
13883 fputs ("nle", file);
13886 fputs ("ord", file);
13889 output_operand_lossage ("operand is not a condition code, "
13890 "invalid operand code 'D'");
13896 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13897 if (ASSEMBLER_DIALECT == ASM_ATT)
13899 switch (GET_MODE (x))
13901 case HImode: putc ('w', file); break;
13903 case SFmode: putc ('l', file); break;
13905 case DFmode: putc ('q', file); break;
13906 default: gcc_unreachable ();
13913 if (!COMPARISON_P (x))
13915 output_operand_lossage ("operand is neither a constant nor a "
13916 "condition code, invalid operand code "
13920 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13923 if (!COMPARISON_P (x))
13925 output_operand_lossage ("operand is neither a constant nor a "
13926 "condition code, invalid operand code "
13930 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13931 if (ASSEMBLER_DIALECT == ASM_ATT)
13934 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13937 /* Like above, but reverse condition */
13939 /* Check to see if argument to %c is really a constant
13940 and not a condition code which needs to be reversed. */
13941 if (!COMPARISON_P (x))
13943 output_operand_lossage ("operand is neither a constant nor a "
13944 "condition code, invalid operand "
13948 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13951 if (!COMPARISON_P (x))
13953 output_operand_lossage ("operand is neither a constant nor a "
13954 "condition code, invalid operand "
13958 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13959 if (ASSEMBLER_DIALECT == ASM_ATT)
13962 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13966 /* It doesn't actually matter what mode we use here, as we're
13967 only going to use this for printing. */
13968 x = adjust_address_nv (x, DImode, 8);
13976 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13979 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13982 int pred_val = INTVAL (XEXP (x, 0));
13984 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13985 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13987 int taken = pred_val > REG_BR_PROB_BASE / 2;
13988 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13990 /* Emit hints only in the case default branch prediction
13991 heuristics would fail. */
13992 if (taken != cputaken)
13994 /* We use 3e (DS) prefix for taken branches and
13995 2e (CS) prefix for not taken branches. */
13997 fputs ("ds ; ", file);
13999 fputs ("cs ; ", file);
14007 switch (GET_CODE (x))
14010 fputs ("neq", file);
14013 fputs ("eq", file);
14017 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14021 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14025 fputs ("le", file);
14029 fputs ("lt", file);
14032 fputs ("unord", file);
14035 fputs ("ord", file);
14038 fputs ("ueq", file);
14041 fputs ("nlt", file);
14044 fputs ("nle", file);
14047 fputs ("ule", file);
14050 fputs ("ult", file);
14053 fputs ("une", file);
14056 output_operand_lossage ("operand is not a condition code, "
14057 "invalid operand code 'Y'");
14063 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14069 if (ASSEMBLER_DIALECT == ASM_ATT)
14072 /* The kernel uses a different segment register for performance
14073 reasons; a system call would not have to trash the userspace
14074 segment register, which would be expensive. */
14075 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14076 fputs ("fs", file);
14078 fputs ("gs", file);
14082 putc (TARGET_AVX2 ? 'i' : 'f', file);
14086 output_operand_lossage ("invalid operand code '%c'", code);
14091 print_reg (x, code, file);
14093 else if (MEM_P (x))
14095 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14096 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14097 && GET_MODE (x) != BLKmode)
14100 switch (GET_MODE_SIZE (GET_MODE (x)))
14102 case 1: size = "BYTE"; break;
14103 case 2: size = "WORD"; break;
14104 case 4: size = "DWORD"; break;
14105 case 8: size = "QWORD"; break;
14106 case 12: size = "TBYTE"; break;
14108 if (GET_MODE (x) == XFmode)
14113 case 32: size = "YMMWORD"; break;
14115 gcc_unreachable ();
14118 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14121 else if (code == 'w')
14123 else if (code == 'k')
14126 fputs (size, file);
14127 fputs (" PTR ", file);
14131 /* Avoid (%rip) for call operands. */
14132 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14133 && !CONST_INT_P (x))
14134 output_addr_const (file, x);
14135 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14136 output_operand_lossage ("invalid constraints for operand");
14138 output_address (x);
14141 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14146 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14147 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14149 if (ASSEMBLER_DIALECT == ASM_ATT)
14151 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14153 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14155 fprintf (file, "0x%08x", (unsigned int) l);
14158 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14163 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14164 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14166 if (ASSEMBLER_DIALECT == ASM_ATT)
14168 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14171 /* These float cases don't actually occur as immediate operands. */
14172 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14176 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14177 fputs (dstr, file);
14182 /* We have patterns that allow zero sets of memory, for instance.
14183 In 64-bit mode, we should probably support all 8-byte vectors,
14184 since we can in fact encode that into an immediate. */
14185 if (GET_CODE (x) == CONST_VECTOR)
14187 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14191 if (code != 'P' && code != 'p')
14193 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14195 if (ASSEMBLER_DIALECT == ASM_ATT)
14198 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14199 || GET_CODE (x) == LABEL_REF)
14201 if (ASSEMBLER_DIALECT == ASM_ATT)
14204 fputs ("OFFSET FLAT:", file);
14207 if (CONST_INT_P (x))
14208 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14209 else if (flag_pic || MACHOPIC_INDIRECT)
14210 output_pic_addr_const (file, x, code);
14212 output_addr_const (file, x);
14217 ix86_print_operand_punct_valid_p (unsigned char code)
14219 return (code == '@' || code == '*' || code == '+'
14220 || code == '&' || code == ';' || code == '~');
14223 /* Print a memory operand whose address is ADDR. */
14226 ix86_print_operand_address (FILE *file, rtx addr)
14228 struct ix86_address parts;
14229 rtx base, index, disp;
14231 int ok = ix86_decompose_address (addr, &parts);
14235 if (parts.base && GET_CODE (parts.base) == SUBREG)
14237 rtx tmp = SUBREG_REG (parts.base);
14238 parts.base = simplify_subreg (GET_MODE (parts.base),
14239 tmp, GET_MODE (tmp), 0);
14242 if (parts.index && GET_CODE (parts.index) == SUBREG)
14244 rtx tmp = SUBREG_REG (parts.index);
14245 parts.index = simplify_subreg (GET_MODE (parts.index),
14246 tmp, GET_MODE (tmp), 0);
14250 index = parts.index;
14252 scale = parts.scale;
14260 if (ASSEMBLER_DIALECT == ASM_ATT)
14262 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14265 gcc_unreachable ();
14268 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14269 if (TARGET_64BIT && !base && !index)
14273 if (GET_CODE (disp) == CONST
14274 && GET_CODE (XEXP (disp, 0)) == PLUS
14275 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14276 symbol = XEXP (XEXP (disp, 0), 0);
14278 if (GET_CODE (symbol) == LABEL_REF
14279 || (GET_CODE (symbol) == SYMBOL_REF
14280 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14283 if (!base && !index)
14285 /* Displacement only requires special attention. */
14287 if (CONST_INT_P (disp))
14289 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14290 fputs ("ds:", file);
14291 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14294 output_pic_addr_const (file, disp, 0);
14296 output_addr_const (file, disp);
14302 /* Print SImode registers for zero-extended addresses to force
14303 addr32 prefix. Otherwise print DImode registers to avoid it. */
14305 code = ((GET_CODE (addr) == ZERO_EXTEND
14306 || GET_CODE (addr) == AND)
14310 if (ASSEMBLER_DIALECT == ASM_ATT)
14315 output_pic_addr_const (file, disp, 0);
14316 else if (GET_CODE (disp) == LABEL_REF)
14317 output_asm_label (disp);
14319 output_addr_const (file, disp);
14324 print_reg (base, code, file);
14328 print_reg (index, code, file);
14330 fprintf (file, ",%d", scale);
14336 rtx offset = NULL_RTX;
14340 /* Pull out the offset of a symbol; print any symbol itself. */
14341 if (GET_CODE (disp) == CONST
14342 && GET_CODE (XEXP (disp, 0)) == PLUS
14343 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14345 offset = XEXP (XEXP (disp, 0), 1);
14346 disp = gen_rtx_CONST (VOIDmode,
14347 XEXP (XEXP (disp, 0), 0));
14351 output_pic_addr_const (file, disp, 0);
14352 else if (GET_CODE (disp) == LABEL_REF)
14353 output_asm_label (disp);
14354 else if (CONST_INT_P (disp))
14357 output_addr_const (file, disp);
14363 print_reg (base, code, file);
14366 if (INTVAL (offset) >= 0)
14368 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14372 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14379 print_reg (index, code, file);
14381 fprintf (file, "*%d", scale);
14388 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14391 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14395 if (GET_CODE (x) != UNSPEC)
14398 op = XVECEXP (x, 0, 0);
14399 switch (XINT (x, 1))
14401 case UNSPEC_GOTTPOFF:
14402 output_addr_const (file, op);
14403 /* FIXME: This might be @TPOFF in Sun ld. */
14404 fputs ("@gottpoff", file);
14407 output_addr_const (file, op);
14408 fputs ("@tpoff", file);
14410 case UNSPEC_NTPOFF:
14411 output_addr_const (file, op);
14413 fputs ("@tpoff", file);
14415 fputs ("@ntpoff", file);
14417 case UNSPEC_DTPOFF:
14418 output_addr_const (file, op);
14419 fputs ("@dtpoff", file);
14421 case UNSPEC_GOTNTPOFF:
14422 output_addr_const (file, op);
14424 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14425 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14427 fputs ("@gotntpoff", file);
14429 case UNSPEC_INDNTPOFF:
14430 output_addr_const (file, op);
14431 fputs ("@indntpoff", file);
14434 case UNSPEC_MACHOPIC_OFFSET:
14435 output_addr_const (file, op);
14437 machopic_output_function_base_name (file);
14441 case UNSPEC_STACK_CHECK:
14445 gcc_assert (flag_split_stack);
14447 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14448 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14450 gcc_unreachable ();
14453 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14464 /* Split one or more double-mode RTL references into pairs of half-mode
14465 references. The RTL can be REG, offsettable MEM, integer constant, or
14466 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14467 split and "num" is its length. lo_half and hi_half are output arrays
14468 that parallel "operands". */
14471 split_double_mode (enum machine_mode mode, rtx operands[],
14472 int num, rtx lo_half[], rtx hi_half[])
14474 enum machine_mode half_mode;
14480 half_mode = DImode;
14483 half_mode = SImode;
14486 gcc_unreachable ();
14489 byte = GET_MODE_SIZE (half_mode);
14493 rtx op = operands[num];
14495 /* simplify_subreg refuse to split volatile memory addresses,
14496 but we still have to handle it. */
14499 lo_half[num] = adjust_address (op, half_mode, 0);
14500 hi_half[num] = adjust_address (op, half_mode, byte);
14504 lo_half[num] = simplify_gen_subreg (half_mode, op,
14505 GET_MODE (op) == VOIDmode
14506 ? mode : GET_MODE (op), 0);
14507 hi_half[num] = simplify_gen_subreg (half_mode, op,
14508 GET_MODE (op) == VOIDmode
14509 ? mode : GET_MODE (op), byte);
14514 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14515 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14516 is the expression of the binary operation. The output may either be
14517 emitted here, or returned to the caller, like all output_* functions.
14519 There is no guarantee that the operands are the same mode, as they
14520 might be within FLOAT or FLOAT_EXTEND expressions. */
14522 #ifndef SYSV386_COMPAT
14523 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14524 wants to fix the assemblers because that causes incompatibility
14525 with gcc. No-one wants to fix gcc because that causes
14526 incompatibility with assemblers... You can use the option of
14527 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14528 #define SYSV386_COMPAT 1
14532 output_387_binary_op (rtx insn, rtx *operands)
14534 static char buf[40];
14537 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14539 #ifdef ENABLE_CHECKING
14540 /* Even if we do not want to check the inputs, this documents input
14541 constraints. Which helps in understanding the following code. */
14542 if (STACK_REG_P (operands[0])
14543 && ((REG_P (operands[1])
14544 && REGNO (operands[0]) == REGNO (operands[1])
14545 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14546 || (REG_P (operands[2])
14547 && REGNO (operands[0]) == REGNO (operands[2])
14548 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14549 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14552 gcc_assert (is_sse);
14555 switch (GET_CODE (operands[3]))
14558 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14559 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14567 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14568 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14576 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14577 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14585 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14586 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14594 gcc_unreachable ();
14601 strcpy (buf, ssep);
14602 if (GET_MODE (operands[0]) == SFmode)
14603 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14605 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14609 strcpy (buf, ssep + 1);
14610 if (GET_MODE (operands[0]) == SFmode)
14611 strcat (buf, "ss\t{%2, %0|%0, %2}");
14613 strcat (buf, "sd\t{%2, %0|%0, %2}");
14619 switch (GET_CODE (operands[3]))
14623 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14625 rtx temp = operands[2];
14626 operands[2] = operands[1];
14627 operands[1] = temp;
14630 /* know operands[0] == operands[1]. */
14632 if (MEM_P (operands[2]))
14638 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14640 if (STACK_TOP_P (operands[0]))
14641 /* How is it that we are storing to a dead operand[2]?
14642 Well, presumably operands[1] is dead too. We can't
14643 store the result to st(0) as st(0) gets popped on this
14644 instruction. Instead store to operands[2] (which I
14645 think has to be st(1)). st(1) will be popped later.
14646 gcc <= 2.8.1 didn't have this check and generated
14647 assembly code that the Unixware assembler rejected. */
14648 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14650 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14654 if (STACK_TOP_P (operands[0]))
14655 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14657 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14662 if (MEM_P (operands[1]))
14668 if (MEM_P (operands[2]))
14674 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14677 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14678 derived assemblers, confusingly reverse the direction of
14679 the operation for fsub{r} and fdiv{r} when the
14680 destination register is not st(0). The Intel assembler
14681 doesn't have this brain damage. Read !SYSV386_COMPAT to
14682 figure out what the hardware really does. */
14683 if (STACK_TOP_P (operands[0]))
14684 p = "{p\t%0, %2|rp\t%2, %0}";
14686 p = "{rp\t%2, %0|p\t%0, %2}";
14688 if (STACK_TOP_P (operands[0]))
14689 /* As above for fmul/fadd, we can't store to st(0). */
14690 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14692 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14697 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14700 if (STACK_TOP_P (operands[0]))
14701 p = "{rp\t%0, %1|p\t%1, %0}";
14703 p = "{p\t%1, %0|rp\t%0, %1}";
14705 if (STACK_TOP_P (operands[0]))
14706 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14708 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14713 if (STACK_TOP_P (operands[0]))
14715 if (STACK_TOP_P (operands[1]))
14716 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14718 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14721 else if (STACK_TOP_P (operands[1]))
14724 p = "{\t%1, %0|r\t%0, %1}";
14726 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14732 p = "{r\t%2, %0|\t%0, %2}";
14734 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14740 gcc_unreachable ();
14747 /* Return needed mode for entity in optimize_mode_switching pass. */
14750 ix86_mode_needed (int entity, rtx insn)
14752 enum attr_i387_cw mode;
14754 /* The mode UNINITIALIZED is used to store control word after a
14755 function call or ASM pattern. The mode ANY specify that function
14756 has no requirements on the control word and make no changes in the
14757 bits we are interested in. */
14760 || (NONJUMP_INSN_P (insn)
14761 && (asm_noperands (PATTERN (insn)) >= 0
14762 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14763 return I387_CW_UNINITIALIZED;
14765 if (recog_memoized (insn) < 0)
14766 return I387_CW_ANY;
14768 mode = get_attr_i387_cw (insn);
14773 if (mode == I387_CW_TRUNC)
14778 if (mode == I387_CW_FLOOR)
14783 if (mode == I387_CW_CEIL)
14788 if (mode == I387_CW_MASK_PM)
14793 gcc_unreachable ();
14796 return I387_CW_ANY;
14799 /* Output code to initialize control word copies used by trunc?f?i and
14800 rounding patterns. CURRENT_MODE is set to current control word,
14801 while NEW_MODE is set to new control word. */
14804 emit_i387_cw_initialization (int mode)
14806 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14809 enum ix86_stack_slot slot;
14811 rtx reg = gen_reg_rtx (HImode);
14813 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14814 emit_move_insn (reg, copy_rtx (stored_mode));
14816 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14817 || optimize_function_for_size_p (cfun))
14821 case I387_CW_TRUNC:
14822 /* round toward zero (truncate) */
14823 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14824 slot = SLOT_CW_TRUNC;
14827 case I387_CW_FLOOR:
14828 /* round down toward -oo */
14829 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14830 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14831 slot = SLOT_CW_FLOOR;
14835 /* round up toward +oo */
14836 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14837 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14838 slot = SLOT_CW_CEIL;
14841 case I387_CW_MASK_PM:
14842 /* mask precision exception for nearbyint() */
14843 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14844 slot = SLOT_CW_MASK_PM;
14848 gcc_unreachable ();
14855 case I387_CW_TRUNC:
14856 /* round toward zero (truncate) */
14857 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14858 slot = SLOT_CW_TRUNC;
14861 case I387_CW_FLOOR:
14862 /* round down toward -oo */
14863 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14864 slot = SLOT_CW_FLOOR;
14868 /* round up toward +oo */
14869 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14870 slot = SLOT_CW_CEIL;
14873 case I387_CW_MASK_PM:
14874 /* mask precision exception for nearbyint() */
14875 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14876 slot = SLOT_CW_MASK_PM;
14880 gcc_unreachable ();
14884 gcc_assert (slot < MAX_386_STACK_LOCALS);
14886 new_mode = assign_386_stack_local (HImode, slot);
14887 emit_move_insn (new_mode, reg);
14890 /* Output code for INSN to convert a float to a signed int. OPERANDS
14891 are the insn operands. The output may be [HSD]Imode and the input
14892 operand may be [SDX]Fmode. */
14895 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
14897 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14898 int dimode_p = GET_MODE (operands[0]) == DImode;
14899 int round_mode = get_attr_i387_cw (insn);
14901 /* Jump through a hoop or two for DImode, since the hardware has no
14902 non-popping instruction. We used to do this a different way, but
14903 that was somewhat fragile and broke with post-reload splitters. */
14904 if ((dimode_p || fisttp) && !stack_top_dies)
14905 output_asm_insn ("fld\t%y1", operands);
14907 gcc_assert (STACK_TOP_P (operands[1]));
14908 gcc_assert (MEM_P (operands[0]));
14909 gcc_assert (GET_MODE (operands[1]) != TFmode);
14912 output_asm_insn ("fisttp%Z0\t%0", operands);
14915 if (round_mode != I387_CW_ANY)
14916 output_asm_insn ("fldcw\t%3", operands);
14917 if (stack_top_dies || dimode_p)
14918 output_asm_insn ("fistp%Z0\t%0", operands);
14920 output_asm_insn ("fist%Z0\t%0", operands);
14921 if (round_mode != I387_CW_ANY)
14922 output_asm_insn ("fldcw\t%2", operands);
14928 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14929 have the values zero or one, indicates the ffreep insn's operand
14930 from the OPERANDS array. */
14932 static const char *
14933 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14935 if (TARGET_USE_FFREEP)
14936 #ifdef HAVE_AS_IX86_FFREEP
14937 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14940 static char retval[32];
14941 int regno = REGNO (operands[opno]);
14943 gcc_assert (FP_REGNO_P (regno));
14945 regno -= FIRST_STACK_REG;
14947 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14952 return opno ? "fstp\t%y1" : "fstp\t%y0";
14956 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14957 should be used. UNORDERED_P is true when fucom should be used. */
14960 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
14962 int stack_top_dies;
14963 rtx cmp_op0, cmp_op1;
14964 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14968 cmp_op0 = operands[0];
14969 cmp_op1 = operands[1];
14973 cmp_op0 = operands[1];
14974 cmp_op1 = operands[2];
14979 if (GET_MODE (operands[0]) == SFmode)
14981 return "%vucomiss\t{%1, %0|%0, %1}";
14983 return "%vcomiss\t{%1, %0|%0, %1}";
14986 return "%vucomisd\t{%1, %0|%0, %1}";
14988 return "%vcomisd\t{%1, %0|%0, %1}";
14991 gcc_assert (STACK_TOP_P (cmp_op0));
14993 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14995 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14997 if (stack_top_dies)
14999 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15000 return output_387_ffreep (operands, 1);
15003 return "ftst\n\tfnstsw\t%0";
15006 if (STACK_REG_P (cmp_op1)
15008 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15009 && REGNO (cmp_op1) != FIRST_STACK_REG)
15011 /* If both the top of the 387 stack dies, and the other operand
15012 is also a stack register that dies, then this must be a
15013 `fcompp' float compare */
15017 /* There is no double popping fcomi variant. Fortunately,
15018 eflags is immune from the fstp's cc clobbering. */
15020 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15022 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15023 return output_387_ffreep (operands, 0);
15028 return "fucompp\n\tfnstsw\t%0";
15030 return "fcompp\n\tfnstsw\t%0";
15035 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15037 static const char * const alt[16] =
15039 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15040 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15041 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15042 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15044 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15045 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15049 "fcomi\t{%y1, %0|%0, %y1}",
15050 "fcomip\t{%y1, %0|%0, %y1}",
15051 "fucomi\t{%y1, %0|%0, %y1}",
15052 "fucomip\t{%y1, %0|%0, %y1}",
15063 mask = eflags_p << 3;
15064 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15065 mask |= unordered_p << 1;
15066 mask |= stack_top_dies;
15068 gcc_assert (mask < 16);
15077 ix86_output_addr_vec_elt (FILE *file, int value)
15079 const char *directive = ASM_LONG;
15083 directive = ASM_QUAD;
15085 gcc_assert (!TARGET_64BIT);
15088 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15092 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15094 const char *directive = ASM_LONG;
15097 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15098 directive = ASM_QUAD;
15100 gcc_assert (!TARGET_64BIT);
15102 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15103 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15104 fprintf (file, "%s%s%d-%s%d\n",
15105 directive, LPREFIX, value, LPREFIX, rel);
15106 else if (HAVE_AS_GOTOFF_IN_DATA)
15107 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15109 else if (TARGET_MACHO)
15111 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15112 machopic_output_function_base_name (file);
15117 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15118 GOT_SYMBOL_NAME, LPREFIX, value);
15121 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15125 ix86_expand_clear (rtx dest)
15129 /* We play register width games, which are only valid after reload. */
15130 gcc_assert (reload_completed);
15132 /* Avoid HImode and its attendant prefix byte. */
15133 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15134 dest = gen_rtx_REG (SImode, REGNO (dest));
15135 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15137 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15138 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15140 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15141 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15147 /* X is an unchanging MEM. If it is a constant pool reference, return
15148 the constant pool rtx, else NULL. */
15151 maybe_get_pool_constant (rtx x)
15153 x = ix86_delegitimize_address (XEXP (x, 0));
15155 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15156 return get_pool_constant (x);
15162 ix86_expand_move (enum machine_mode mode, rtx operands[])
15165 enum tls_model model;
15170 if (GET_CODE (op1) == SYMBOL_REF)
15172 model = SYMBOL_REF_TLS_MODEL (op1);
15175 op1 = legitimize_tls_address (op1, model, true);
15176 op1 = force_operand (op1, op0);
15179 if (GET_MODE (op1) != mode)
15180 op1 = convert_to_mode (mode, op1, 1);
15182 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15183 && SYMBOL_REF_DLLIMPORT_P (op1))
15184 op1 = legitimize_dllimport_symbol (op1, false);
15186 else if (GET_CODE (op1) == CONST
15187 && GET_CODE (XEXP (op1, 0)) == PLUS
15188 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15190 rtx addend = XEXP (XEXP (op1, 0), 1);
15191 rtx symbol = XEXP (XEXP (op1, 0), 0);
15194 model = SYMBOL_REF_TLS_MODEL (symbol);
15196 tmp = legitimize_tls_address (symbol, model, true);
15197 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15198 && SYMBOL_REF_DLLIMPORT_P (symbol))
15199 tmp = legitimize_dllimport_symbol (symbol, true);
15203 tmp = force_operand (tmp, NULL);
15204 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15205 op0, 1, OPTAB_DIRECT);
15208 if (GET_MODE (tmp) != mode)
15209 op1 = convert_to_mode (mode, tmp, 1);
15213 if ((flag_pic || MACHOPIC_INDIRECT)
15214 && symbolic_operand (op1, mode))
15216 if (TARGET_MACHO && !TARGET_64BIT)
15219 /* dynamic-no-pic */
15220 if (MACHOPIC_INDIRECT)
15222 rtx temp = ((reload_in_progress
15223 || ((op0 && REG_P (op0))
15225 ? op0 : gen_reg_rtx (Pmode));
15226 op1 = machopic_indirect_data_reference (op1, temp);
15228 op1 = machopic_legitimize_pic_address (op1, mode,
15229 temp == op1 ? 0 : temp);
15231 if (op0 != op1 && GET_CODE (op0) != MEM)
15233 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15237 if (GET_CODE (op0) == MEM)
15238 op1 = force_reg (Pmode, op1);
15242 if (GET_CODE (temp) != REG)
15243 temp = gen_reg_rtx (Pmode);
15244 temp = legitimize_pic_address (op1, temp);
15249 /* dynamic-no-pic */
15255 op1 = force_reg (mode, op1);
15256 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15258 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15259 op1 = legitimize_pic_address (op1, reg);
15262 if (GET_MODE (op1) != mode)
15263 op1 = convert_to_mode (mode, op1, 1);
15270 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15271 || !push_operand (op0, mode))
15273 op1 = force_reg (mode, op1);
15275 if (push_operand (op0, mode)
15276 && ! general_no_elim_operand (op1, mode))
15277 op1 = copy_to_mode_reg (mode, op1);
15279 /* Force large constants in 64bit compilation into register
15280 to get them CSEed. */
15281 if (can_create_pseudo_p ()
15282 && (mode == DImode) && TARGET_64BIT
15283 && immediate_operand (op1, mode)
15284 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15285 && !register_operand (op0, mode)
15287 op1 = copy_to_mode_reg (mode, op1);
15289 if (can_create_pseudo_p ()
15290 && FLOAT_MODE_P (mode)
15291 && GET_CODE (op1) == CONST_DOUBLE)
15293 /* If we are loading a floating point constant to a register,
15294 force the value to memory now, since we'll get better code
15295 out the back end. */
15297 op1 = validize_mem (force_const_mem (mode, op1));
15298 if (!register_operand (op0, mode))
15300 rtx temp = gen_reg_rtx (mode);
15301 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15302 emit_move_insn (op0, temp);
15308 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15312 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15314 rtx op0 = operands[0], op1 = operands[1];
15315 unsigned int align = GET_MODE_ALIGNMENT (mode);
15317 /* Force constants other than zero into memory. We do not know how
15318 the instructions used to build constants modify the upper 64 bits
15319 of the register, once we have that information we may be able
15320 to handle some of them more efficiently. */
15321 if (can_create_pseudo_p ()
15322 && register_operand (op0, mode)
15323 && (CONSTANT_P (op1)
15324 || (GET_CODE (op1) == SUBREG
15325 && CONSTANT_P (SUBREG_REG (op1))))
15326 && !standard_sse_constant_p (op1))
15327 op1 = validize_mem (force_const_mem (mode, op1));
15329 /* We need to check memory alignment for SSE mode since attribute
15330 can make operands unaligned. */
15331 if (can_create_pseudo_p ()
15332 && SSE_REG_MODE_P (mode)
15333 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15334 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15338 /* ix86_expand_vector_move_misalign() does not like constants ... */
15339 if (CONSTANT_P (op1)
15340 || (GET_CODE (op1) == SUBREG
15341 && CONSTANT_P (SUBREG_REG (op1))))
15342 op1 = validize_mem (force_const_mem (mode, op1));
15344 /* ... nor both arguments in memory. */
15345 if (!register_operand (op0, mode)
15346 && !register_operand (op1, mode))
15347 op1 = force_reg (mode, op1);
15349 tmp[0] = op0; tmp[1] = op1;
15350 ix86_expand_vector_move_misalign (mode, tmp);
15354 /* Make operand1 a register if it isn't already. */
15355 if (can_create_pseudo_p ()
15356 && !register_operand (op0, mode)
15357 && !register_operand (op1, mode))
15359 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15363 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15366 /* Split 32-byte AVX unaligned load and store if needed. */
15369 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15372 rtx (*extract) (rtx, rtx, rtx);
15373 rtx (*move_unaligned) (rtx, rtx);
15374 enum machine_mode mode;
15376 switch (GET_MODE (op0))
15379 gcc_unreachable ();
15381 extract = gen_avx_vextractf128v32qi;
15382 move_unaligned = gen_avx_movdqu256;
15386 extract = gen_avx_vextractf128v8sf;
15387 move_unaligned = gen_avx_movups256;
15391 extract = gen_avx_vextractf128v4df;
15392 move_unaligned = gen_avx_movupd256;
15397 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15399 rtx r = gen_reg_rtx (mode);
15400 m = adjust_address (op1, mode, 0);
15401 emit_move_insn (r, m);
15402 m = adjust_address (op1, mode, 16);
15403 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15404 emit_move_insn (op0, r);
15406 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15408 m = adjust_address (op0, mode, 0);
15409 emit_insn (extract (m, op1, const0_rtx));
15410 m = adjust_address (op0, mode, 16);
15411 emit_insn (extract (m, op1, const1_rtx));
15414 emit_insn (move_unaligned (op0, op1));
15417 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15418 straight to ix86_expand_vector_move. */
15419 /* Code generation for scalar reg-reg moves of single and double precision data:
15420 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15424 if (x86_sse_partial_reg_dependency == true)
15429 Code generation for scalar loads of double precision data:
15430 if (x86_sse_split_regs == true)
15431 movlpd mem, reg (gas syntax)
15435 Code generation for unaligned packed loads of single precision data
15436 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15437 if (x86_sse_unaligned_move_optimal)
15440 if (x86_sse_partial_reg_dependency == true)
15452 Code generation for unaligned packed loads of double precision data
15453 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15454 if (x86_sse_unaligned_move_optimal)
15457 if (x86_sse_split_regs == true)
15470 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15479 switch (GET_MODE_CLASS (mode))
15481 case MODE_VECTOR_INT:
15483 switch (GET_MODE_SIZE (mode))
15486 /* If we're optimizing for size, movups is the smallest. */
15487 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15489 op0 = gen_lowpart (V4SFmode, op0);
15490 op1 = gen_lowpart (V4SFmode, op1);
15491 emit_insn (gen_sse_movups (op0, op1));
15494 op0 = gen_lowpart (V16QImode, op0);
15495 op1 = gen_lowpart (V16QImode, op1);
15496 emit_insn (gen_sse2_movdqu (op0, op1));
15499 op0 = gen_lowpart (V32QImode, op0);
15500 op1 = gen_lowpart (V32QImode, op1);
15501 ix86_avx256_split_vector_move_misalign (op0, op1);
15504 gcc_unreachable ();
15507 case MODE_VECTOR_FLOAT:
15508 op0 = gen_lowpart (mode, op0);
15509 op1 = gen_lowpart (mode, op1);
15514 emit_insn (gen_sse_movups (op0, op1));
15517 ix86_avx256_split_vector_move_misalign (op0, op1);
15520 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15522 op0 = gen_lowpart (V4SFmode, op0);
15523 op1 = gen_lowpart (V4SFmode, op1);
15524 emit_insn (gen_sse_movups (op0, op1));
15527 emit_insn (gen_sse2_movupd (op0, op1));
15530 ix86_avx256_split_vector_move_misalign (op0, op1);
15533 gcc_unreachable ();
15538 gcc_unreachable ();
15546 /* If we're optimizing for size, movups is the smallest. */
15547 if (optimize_insn_for_size_p ()
15548 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15550 op0 = gen_lowpart (V4SFmode, op0);
15551 op1 = gen_lowpart (V4SFmode, op1);
15552 emit_insn (gen_sse_movups (op0, op1));
15556 /* ??? If we have typed data, then it would appear that using
15557 movdqu is the only way to get unaligned data loaded with
15559 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15561 op0 = gen_lowpart (V16QImode, op0);
15562 op1 = gen_lowpart (V16QImode, op1);
15563 emit_insn (gen_sse2_movdqu (op0, op1));
15567 if (TARGET_SSE2 && mode == V2DFmode)
15571 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15573 op0 = gen_lowpart (V2DFmode, op0);
15574 op1 = gen_lowpart (V2DFmode, op1);
15575 emit_insn (gen_sse2_movupd (op0, op1));
15579 /* When SSE registers are split into halves, we can avoid
15580 writing to the top half twice. */
15581 if (TARGET_SSE_SPLIT_REGS)
15583 emit_clobber (op0);
15588 /* ??? Not sure about the best option for the Intel chips.
15589 The following would seem to satisfy; the register is
15590 entirely cleared, breaking the dependency chain. We
15591 then store to the upper half, with a dependency depth
15592 of one. A rumor has it that Intel recommends two movsd
15593 followed by an unpacklpd, but this is unconfirmed. And
15594 given that the dependency depth of the unpacklpd would
15595 still be one, I'm not sure why this would be better. */
15596 zero = CONST0_RTX (V2DFmode);
15599 m = adjust_address (op1, DFmode, 0);
15600 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15601 m = adjust_address (op1, DFmode, 8);
15602 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15606 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15608 op0 = gen_lowpart (V4SFmode, op0);
15609 op1 = gen_lowpart (V4SFmode, op1);
15610 emit_insn (gen_sse_movups (op0, op1));
15614 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15615 emit_move_insn (op0, CONST0_RTX (mode));
15617 emit_clobber (op0);
15619 if (mode != V4SFmode)
15620 op0 = gen_lowpart (V4SFmode, op0);
15621 m = adjust_address (op1, V2SFmode, 0);
15622 emit_insn (gen_sse_loadlps (op0, op0, m));
15623 m = adjust_address (op1, V2SFmode, 8);
15624 emit_insn (gen_sse_loadhps (op0, op0, m));
15627 else if (MEM_P (op0))
15629 /* If we're optimizing for size, movups is the smallest. */
15630 if (optimize_insn_for_size_p ()
15631 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15633 op0 = gen_lowpart (V4SFmode, op0);
15634 op1 = gen_lowpart (V4SFmode, op1);
15635 emit_insn (gen_sse_movups (op0, op1));
15639 /* ??? Similar to above, only less clear because of quote
15640 typeless stores unquote. */
15641 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15642 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15644 op0 = gen_lowpart (V16QImode, op0);
15645 op1 = gen_lowpart (V16QImode, op1);
15646 emit_insn (gen_sse2_movdqu (op0, op1));
15650 if (TARGET_SSE2 && mode == V2DFmode)
15652 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15654 op0 = gen_lowpart (V2DFmode, op0);
15655 op1 = gen_lowpart (V2DFmode, op1);
15656 emit_insn (gen_sse2_movupd (op0, op1));
15660 m = adjust_address (op0, DFmode, 0);
15661 emit_insn (gen_sse2_storelpd (m, op1));
15662 m = adjust_address (op0, DFmode, 8);
15663 emit_insn (gen_sse2_storehpd (m, op1));
15668 if (mode != V4SFmode)
15669 op1 = gen_lowpart (V4SFmode, op1);
15671 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15673 op0 = gen_lowpart (V4SFmode, op0);
15674 emit_insn (gen_sse_movups (op0, op1));
15678 m = adjust_address (op0, V2SFmode, 0);
15679 emit_insn (gen_sse_storelps (m, op1));
15680 m = adjust_address (op0, V2SFmode, 8);
15681 emit_insn (gen_sse_storehps (m, op1));
15686 gcc_unreachable ();
15689 /* Expand a push in MODE. This is some mode for which we do not support
15690 proper push instructions, at least from the registers that we expect
15691 the value to live in. */
15694 ix86_expand_push (enum machine_mode mode, rtx x)
15698 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15699 GEN_INT (-GET_MODE_SIZE (mode)),
15700 stack_pointer_rtx, 1, OPTAB_DIRECT);
15701 if (tmp != stack_pointer_rtx)
15702 emit_move_insn (stack_pointer_rtx, tmp);
15704 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15706 /* When we push an operand onto stack, it has to be aligned at least
15707 at the function argument boundary. However since we don't have
15708 the argument type, we can't determine the actual argument
15710 emit_move_insn (tmp, x);
15713 /* Helper function of ix86_fixup_binary_operands to canonicalize
15714 operand order. Returns true if the operands should be swapped. */
15717 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15720 rtx dst = operands[0];
15721 rtx src1 = operands[1];
15722 rtx src2 = operands[2];
15724 /* If the operation is not commutative, we can't do anything. */
15725 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15728 /* Highest priority is that src1 should match dst. */
15729 if (rtx_equal_p (dst, src1))
15731 if (rtx_equal_p (dst, src2))
15734 /* Next highest priority is that immediate constants come second. */
15735 if (immediate_operand (src2, mode))
15737 if (immediate_operand (src1, mode))
15740 /* Lowest priority is that memory references should come second. */
15750 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15751 destination to use for the operation. If different from the true
15752 destination in operands[0], a copy operation will be required. */
15755 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15758 rtx dst = operands[0];
15759 rtx src1 = operands[1];
15760 rtx src2 = operands[2];
15762 /* Canonicalize operand order. */
15763 if (ix86_swap_binary_operands_p (code, mode, operands))
15767 /* It is invalid to swap operands of different modes. */
15768 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15775 /* Both source operands cannot be in memory. */
15776 if (MEM_P (src1) && MEM_P (src2))
15778 /* Optimization: Only read from memory once. */
15779 if (rtx_equal_p (src1, src2))
15781 src2 = force_reg (mode, src2);
15785 src2 = force_reg (mode, src2);
15788 /* If the destination is memory, and we do not have matching source
15789 operands, do things in registers. */
15790 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15791 dst = gen_reg_rtx (mode);
15793 /* Source 1 cannot be a constant. */
15794 if (CONSTANT_P (src1))
15795 src1 = force_reg (mode, src1);
15797 /* Source 1 cannot be a non-matching memory. */
15798 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15799 src1 = force_reg (mode, src1);
15801 operands[1] = src1;
15802 operands[2] = src2;
15806 /* Similarly, but assume that the destination has already been
15807 set up properly. */
15810 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15811 enum machine_mode mode, rtx operands[])
15813 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15814 gcc_assert (dst == operands[0]);
15817 /* Attempt to expand a binary operator. Make the expansion closer to the
15818 actual machine, then just general_operand, which will allow 3 separate
15819 memory references (one output, two input) in a single insn. */
15822 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15825 rtx src1, src2, dst, op, clob;
15827 dst = ix86_fixup_binary_operands (code, mode, operands);
15828 src1 = operands[1];
15829 src2 = operands[2];
15831 /* Emit the instruction. */
15833 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15834 if (reload_in_progress)
15836 /* Reload doesn't know about the flags register, and doesn't know that
15837 it doesn't want to clobber it. We can only do this with PLUS. */
15838 gcc_assert (code == PLUS);
15841 else if (reload_completed
15843 && !rtx_equal_p (dst, src1))
15845 /* This is going to be an LEA; avoid splitting it later. */
15850 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15851 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15854 /* Fix up the destination if needed. */
15855 if (dst != operands[0])
15856 emit_move_insn (operands[0], dst);
15859 /* Return TRUE or FALSE depending on whether the binary operator meets the
15860 appropriate constraints. */
15863 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15866 rtx dst = operands[0];
15867 rtx src1 = operands[1];
15868 rtx src2 = operands[2];
15870 /* Both source operands cannot be in memory. */
15871 if (MEM_P (src1) && MEM_P (src2))
15874 /* Canonicalize operand order for commutative operators. */
15875 if (ix86_swap_binary_operands_p (code, mode, operands))
15882 /* If the destination is memory, we must have a matching source operand. */
15883 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15886 /* Source 1 cannot be a constant. */
15887 if (CONSTANT_P (src1))
15890 /* Source 1 cannot be a non-matching memory. */
15891 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15892 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15893 return (code == AND
15896 || (TARGET_64BIT && mode == DImode))
15897 && satisfies_constraint_L (src2));
15902 /* Attempt to expand a unary operator. Make the expansion closer to the
15903 actual machine, then just general_operand, which will allow 2 separate
15904 memory references (one output, one input) in a single insn. */
15907 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15910 int matching_memory;
15911 rtx src, dst, op, clob;
15916 /* If the destination is memory, and we do not have matching source
15917 operands, do things in registers. */
15918 matching_memory = 0;
15921 if (rtx_equal_p (dst, src))
15922 matching_memory = 1;
15924 dst = gen_reg_rtx (mode);
15927 /* When source operand is memory, destination must match. */
15928 if (MEM_P (src) && !matching_memory)
15929 src = force_reg (mode, src);
15931 /* Emit the instruction. */
15933 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15934 if (reload_in_progress || code == NOT)
15936 /* Reload doesn't know about the flags register, and doesn't know that
15937 it doesn't want to clobber it. */
15938 gcc_assert (code == NOT);
15943 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15944 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15947 /* Fix up the destination if needed. */
15948 if (dst != operands[0])
15949 emit_move_insn (operands[0], dst);
15952 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15953 divisor are within the range [0-255]. */
15956 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15959 rtx end_label, qimode_label;
15960 rtx insn, div, mod;
15961 rtx scratch, tmp0, tmp1, tmp2;
15962 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15963 rtx (*gen_zero_extend) (rtx, rtx);
15964 rtx (*gen_test_ccno_1) (rtx, rtx);
15969 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15970 gen_test_ccno_1 = gen_testsi_ccno_1;
15971 gen_zero_extend = gen_zero_extendqisi2;
15974 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15975 gen_test_ccno_1 = gen_testdi_ccno_1;
15976 gen_zero_extend = gen_zero_extendqidi2;
15979 gcc_unreachable ();
15982 end_label = gen_label_rtx ();
15983 qimode_label = gen_label_rtx ();
15985 scratch = gen_reg_rtx (mode);
15987 /* Use 8bit unsigned divimod if dividend and divisor are within
15988 the range [0-255]. */
15989 emit_move_insn (scratch, operands[2]);
15990 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15991 scratch, 1, OPTAB_DIRECT);
15992 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15993 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15994 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15995 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15996 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15998 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15999 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16000 JUMP_LABEL (insn) = qimode_label;
16002 /* Generate original signed/unsigned divimod. */
16003 div = gen_divmod4_1 (operands[0], operands[1],
16004 operands[2], operands[3]);
16007 /* Branch to the end. */
16008 emit_jump_insn (gen_jump (end_label));
16011 /* Generate 8bit unsigned divide. */
16012 emit_label (qimode_label);
16013 /* Don't use operands[0] for result of 8bit divide since not all
16014 registers support QImode ZERO_EXTRACT. */
16015 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16016 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16017 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16018 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16022 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16023 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16027 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16028 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16031 /* Extract remainder from AH. */
16032 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16033 if (REG_P (operands[1]))
16034 insn = emit_move_insn (operands[1], tmp1);
16037 /* Need a new scratch register since the old one has result
16039 scratch = gen_reg_rtx (mode);
16040 emit_move_insn (scratch, tmp1);
16041 insn = emit_move_insn (operands[1], scratch);
16043 set_unique_reg_note (insn, REG_EQUAL, mod);
16045 /* Zero extend quotient from AL. */
16046 tmp1 = gen_lowpart (QImode, tmp0);
16047 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16048 set_unique_reg_note (insn, REG_EQUAL, div);
16050 emit_label (end_label);
16053 #define LEA_MAX_STALL (3)
16054 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16056 /* Increase given DISTANCE in half-cycles according to
16057 dependencies between PREV and NEXT instructions.
16058 Add 1 half-cycle if there is no dependency and
16059 go to next cycle if there is some dependecy. */
16061 static unsigned int
16062 increase_distance (rtx prev, rtx next, unsigned int distance)
16067 if (!prev || !next)
16068 return distance + (distance & 1) + 2;
16070 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
16071 return distance + 1;
16073 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16074 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16075 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
16076 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
16077 return distance + (distance & 1) + 2;
16079 return distance + 1;
16082 /* Function checks if instruction INSN defines register number
16083 REGNO1 or REGNO2. */
16086 insn_defines_reg (unsigned int regno1, unsigned int regno2,
16091 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
16092 if (DF_REF_REG_DEF_P (*def_rec)
16093 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16094 && (regno1 == DF_REF_REGNO (*def_rec)
16095 || regno2 == DF_REF_REGNO (*def_rec)))
16103 /* Function checks if instruction INSN uses register number
16104 REGNO as a part of address expression. */
16107 insn_uses_reg_mem (unsigned int regno, rtx insn)
16111 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
16112 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
16118 /* Search backward for non-agu definition of register number REGNO1
16119 or register number REGNO2 in basic block starting from instruction
16120 START up to head of basic block or instruction INSN.
16122 Function puts true value into *FOUND var if definition was found
16123 and false otherwise.
16125 Distance in half-cycles between START and found instruction or head
16126 of BB is added to DISTANCE and returned. */
16129 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16130 rtx insn, int distance,
16131 rtx start, bool *found)
16133 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16136 enum attr_type insn_type;
16142 && distance < LEA_SEARCH_THRESHOLD)
16144 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16146 distance = increase_distance (prev, next, distance);
16147 if (insn_defines_reg (regno1, regno2, prev))
16149 insn_type = get_attr_type (prev);
16150 if (insn_type != TYPE_LEA)
16159 if (prev == BB_HEAD (bb))
16162 prev = PREV_INSN (prev);
16168 /* Search backward for non-agu definition of register number REGNO1
16169 or register number REGNO2 in INSN's basic block until
16170 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16171 2. Reach neighbour BBs boundary, or
16172 3. Reach agu definition.
16173 Returns the distance between the non-agu definition point and INSN.
16174 If no definition point, returns -1. */
16177 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16180 basic_block bb = BLOCK_FOR_INSN (insn);
16182 bool found = false;
16184 if (insn != BB_HEAD (bb))
16185 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16186 distance, PREV_INSN (insn),
16189 if (!found && distance < LEA_SEARCH_THRESHOLD)
16193 bool simple_loop = false;
16195 FOR_EACH_EDGE (e, ei, bb->preds)
16198 simple_loop = true;
16203 distance = distance_non_agu_define_in_bb (regno1, regno2,
16205 BB_END (bb), &found);
16208 int shortest_dist = -1;
16209 bool found_in_bb = false;
16211 FOR_EACH_EDGE (e, ei, bb->preds)
16214 = distance_non_agu_define_in_bb (regno1, regno2,
16220 if (shortest_dist < 0)
16221 shortest_dist = bb_dist;
16222 else if (bb_dist > 0)
16223 shortest_dist = MIN (bb_dist, shortest_dist);
16229 distance = shortest_dist;
16233 /* get_attr_type may modify recog data. We want to make sure
16234 that recog data is valid for instruction INSN, on which
16235 distance_non_agu_define is called. INSN is unchanged here. */
16236 extract_insn_cached (insn);
16241 return distance >> 1;
16244 /* Return the distance in half-cycles between INSN and the next
16245 insn that uses register number REGNO in memory address added
16246 to DISTANCE. Return -1 if REGNO0 is set.
16248 Put true value into *FOUND if register usage was found and
16250 Put true value into *REDEFINED if register redefinition was
16251 found and false otherwise. */
16254 distance_agu_use_in_bb (unsigned int regno,
16255 rtx insn, int distance, rtx start,
16256 bool *found, bool *redefined)
16258 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16263 *redefined = false;
16267 && distance < LEA_SEARCH_THRESHOLD)
16269 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16271 distance = increase_distance(prev, next, distance);
16272 if (insn_uses_reg_mem (regno, next))
16274 /* Return DISTANCE if OP0 is used in memory
16275 address in NEXT. */
16280 if (insn_defines_reg (regno, INVALID_REGNUM, next))
16282 /* Return -1 if OP0 is set in NEXT. */
16290 if (next == BB_END (bb))
16293 next = NEXT_INSN (next);
16299 /* Return the distance between INSN and the next insn that uses
16300 register number REGNO0 in memory address. Return -1 if no such
16301 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16304 distance_agu_use (unsigned int regno0, rtx insn)
16306 basic_block bb = BLOCK_FOR_INSN (insn);
16308 bool found = false;
16309 bool redefined = false;
16311 if (insn != BB_END (bb))
16312 distance = distance_agu_use_in_bb (regno0, insn, distance,
16314 &found, &redefined);
16316 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16320 bool simple_loop = false;
16322 FOR_EACH_EDGE (e, ei, bb->succs)
16325 simple_loop = true;
16330 distance = distance_agu_use_in_bb (regno0, insn,
16331 distance, BB_HEAD (bb),
16332 &found, &redefined);
16335 int shortest_dist = -1;
16336 bool found_in_bb = false;
16337 bool redefined_in_bb = false;
16339 FOR_EACH_EDGE (e, ei, bb->succs)
16342 = distance_agu_use_in_bb (regno0, insn,
16343 distance, BB_HEAD (e->dest),
16344 &found_in_bb, &redefined_in_bb);
16347 if (shortest_dist < 0)
16348 shortest_dist = bb_dist;
16349 else if (bb_dist > 0)
16350 shortest_dist = MIN (bb_dist, shortest_dist);
16356 distance = shortest_dist;
16360 if (!found || redefined)
16363 return distance >> 1;
16366 /* Define this macro to tune LEA priority vs ADD, it take effect when
16367 there is a dilemma of choicing LEA or ADD
16368 Negative value: ADD is more preferred than LEA
16370 Positive value: LEA is more preferred than ADD*/
16371 #define IX86_LEA_PRIORITY 0
16373 /* Return true if usage of lea INSN has performance advantage
16374 over a sequence of instructions. Instructions sequence has
16375 SPLIT_COST cycles higher latency than lea latency. */
16378 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
16379 unsigned int regno2, unsigned int split_cost)
16381 int dist_define, dist_use;
16383 dist_define = distance_non_agu_define (regno1, regno2, insn);
16384 dist_use = distance_agu_use (regno0, insn);
16386 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16388 /* If there is no non AGU operand definition, no AGU
16389 operand usage and split cost is 0 then both lea
16390 and non lea variants have same priority. Currently
16391 we prefer lea for 64 bit code and non lea on 32 bit
16393 if (dist_use < 0 && split_cost == 0)
16394 return TARGET_64BIT || IX86_LEA_PRIORITY;
16399 /* With longer definitions distance lea is more preferable.
16400 Here we change it to take into account splitting cost and
16402 dist_define += split_cost + IX86_LEA_PRIORITY;
16404 /* If there is no use in memory addess then we just check
16405 that split cost does not exceed AGU stall. */
16407 return dist_define >= LEA_MAX_STALL;
16409 /* If this insn has both backward non-agu dependence and forward
16410 agu dependence, the one with short distance takes effect. */
16411 return dist_define >= dist_use;
16414 /* Return true if it is legal to clobber flags by INSN and
16415 false otherwise. */
16418 ix86_ok_to_clobber_flags (rtx insn)
16420 basic_block bb = BLOCK_FOR_INSN (insn);
16426 if (NONDEBUG_INSN_P (insn))
16428 for (use = DF_INSN_USES (insn); *use; use++)
16429 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
16432 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
16436 if (insn == BB_END (bb))
16439 insn = NEXT_INSN (insn);
16442 live = df_get_live_out(bb);
16443 return !REGNO_REG_SET_P (live, FLAGS_REG);
16446 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16447 move and add to avoid AGU stalls. */
16450 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
16452 unsigned int regno0 = true_regnum (operands[0]);
16453 unsigned int regno1 = true_regnum (operands[1]);
16454 unsigned int regno2 = true_regnum (operands[2]);
16456 /* Check if we need to optimize. */
16457 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16460 /* Check it is correct to split here. */
16461 if (!ix86_ok_to_clobber_flags(insn))
16464 /* We need to split only adds with non destructive
16465 destination operand. */
16466 if (regno0 == regno1 || regno0 == regno2)
16469 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
16472 /* Return true if we need to split lea into a sequence of
16473 instructions to avoid AGU stalls. */
16476 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
16478 unsigned int regno0 = true_regnum (operands[0]) ;
16479 unsigned int regno1 = -1;
16480 unsigned int regno2 = -1;
16481 unsigned int split_cost = 0;
16482 struct ix86_address parts;
16485 /* Check we need to optimize. */
16486 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16489 /* Check it is correct to split here. */
16490 if (!ix86_ok_to_clobber_flags(insn))
16493 ok = ix86_decompose_address (operands[1], &parts);
16496 /* We should not split into add if non legitimate pic
16497 operand is used as displacement. */
16498 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16502 regno1 = true_regnum (parts.base);
16504 regno2 = true_regnum (parts.index);
16506 /* Compute how many cycles we will add to execution time
16507 if split lea into a sequence of instructions. */
16508 if (parts.base || parts.index)
16510 /* Have to use mov instruction if non desctructive
16511 destination form is used. */
16512 if (regno1 != regno0 && regno2 != regno0)
16515 /* Have to add index to base if both exist. */
16516 if (parts.base && parts.index)
16519 /* Have to use shift and adds if scale is 2 or greater. */
16520 if (parts.scale > 1)
16522 if (regno0 != regno1)
16524 else if (regno2 == regno0)
16527 split_cost += parts.scale;
16530 /* Have to use add instruction with immediate if
16531 disp is non zero. */
16532 if (parts.disp && parts.disp != const0_rtx)
16535 /* Subtract the price of lea. */
16539 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
16542 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16543 matches destination. RTX includes clobber of FLAGS_REG. */
16546 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
16551 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
16552 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16554 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16557 /* Split lea instructions into a sequence of instructions
16558 which are executed on ALU to avoid AGU stalls.
16559 It is assumed that it is allowed to clobber flags register
16560 at lea position. */
16563 ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
16565 unsigned int regno0 = true_regnum (operands[0]) ;
16566 unsigned int regno1 = INVALID_REGNUM;
16567 unsigned int regno2 = INVALID_REGNUM;
16568 struct ix86_address parts;
16572 ok = ix86_decompose_address (operands[1], &parts);
16577 if (GET_MODE (parts.base) != mode)
16578 parts.base = gen_rtx_SUBREG (mode, parts.base, 0);
16579 regno1 = true_regnum (parts.base);
16584 if (GET_MODE (parts.index) != mode)
16585 parts.index = gen_rtx_SUBREG (mode, parts.index, 0);
16586 regno2 = true_regnum (parts.index);
16589 if (parts.scale > 1)
16591 /* Case r1 = r1 + ... */
16592 if (regno1 == regno0)
16594 /* If we have a case r1 = r1 + C * r1 then we
16595 should use multiplication which is very
16596 expensive. Assume cost model is wrong if we
16597 have such case here. */
16598 gcc_assert (regno2 != regno0);
16600 for (adds = parts.scale; adds > 0; adds--)
16601 ix86_emit_binop (PLUS, mode, operands[0], parts.index);
16605 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16606 if (regno0 != regno2)
16607 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16609 /* Use shift for scaling. */
16610 ix86_emit_binop (ASHIFT, mode, operands[0],
16611 GEN_INT (exact_log2 (parts.scale)));
16614 ix86_emit_binop (PLUS, mode, operands[0], parts.base);
16616 if (parts.disp && parts.disp != const0_rtx)
16617 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
16620 else if (!parts.base && !parts.index)
16622 gcc_assert(parts.disp);
16623 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.disp));
16629 if (regno0 != regno2)
16630 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16632 else if (!parts.index)
16634 if (regno0 != regno1)
16635 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
16639 if (regno0 == regno1)
16641 else if (regno0 == regno2)
16645 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
16649 ix86_emit_binop (PLUS, mode, operands[0], tmp);
16652 if (parts.disp && parts.disp != const0_rtx)
16653 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
16657 /* Return true if it is ok to optimize an ADD operation to LEA
16658 operation to avoid flag register consumation. For most processors,
16659 ADD is faster than LEA. For the processors like ATOM, if the
16660 destination register of LEA holds an actual address which will be
16661 used soon, LEA is better and otherwise ADD is better. */
16664 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16666 unsigned int regno0 = true_regnum (operands[0]);
16667 unsigned int regno1 = true_regnum (operands[1]);
16668 unsigned int regno2 = true_regnum (operands[2]);
16670 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16671 if (regno0 != regno1 && regno0 != regno2)
16674 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16677 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
16680 /* Return true if destination reg of SET_BODY is shift count of
16684 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16690 /* Retrieve destination of SET_BODY. */
16691 switch (GET_CODE (set_body))
16694 set_dest = SET_DEST (set_body);
16695 if (!set_dest || !REG_P (set_dest))
16699 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16700 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16708 /* Retrieve shift count of USE_BODY. */
16709 switch (GET_CODE (use_body))
16712 shift_rtx = XEXP (use_body, 1);
16715 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16716 if (ix86_dep_by_shift_count_body (set_body,
16717 XVECEXP (use_body, 0, i)))
16725 && (GET_CODE (shift_rtx) == ASHIFT
16726 || GET_CODE (shift_rtx) == LSHIFTRT
16727 || GET_CODE (shift_rtx) == ASHIFTRT
16728 || GET_CODE (shift_rtx) == ROTATE
16729 || GET_CODE (shift_rtx) == ROTATERT))
16731 rtx shift_count = XEXP (shift_rtx, 1);
16733 /* Return true if shift count is dest of SET_BODY. */
16734 if (REG_P (shift_count)
16735 && true_regnum (set_dest) == true_regnum (shift_count))
16742 /* Return true if destination reg of SET_INSN is shift count of
16746 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16748 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16749 PATTERN (use_insn));
16752 /* Return TRUE or FALSE depending on whether the unary operator meets the
16753 appropriate constraints. */
16756 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16757 enum machine_mode mode ATTRIBUTE_UNUSED,
16758 rtx operands[2] ATTRIBUTE_UNUSED)
16760 /* If one of operands is memory, source and destination must match. */
16761 if ((MEM_P (operands[0])
16762 || MEM_P (operands[1]))
16763 && ! rtx_equal_p (operands[0], operands[1]))
16768 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16769 are ok, keeping in mind the possible movddup alternative. */
16772 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16774 if (MEM_P (operands[0]))
16775 return rtx_equal_p (operands[0], operands[1 + high]);
16776 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16777 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16781 /* Post-reload splitter for converting an SF or DFmode value in an
16782 SSE register into an unsigned SImode. */
16785 ix86_split_convert_uns_si_sse (rtx operands[])
16787 enum machine_mode vecmode;
16788 rtx value, large, zero_or_two31, input, two31, x;
16790 large = operands[1];
16791 zero_or_two31 = operands[2];
16792 input = operands[3];
16793 two31 = operands[4];
16794 vecmode = GET_MODE (large);
16795 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16797 /* Load up the value into the low element. We must ensure that the other
16798 elements are valid floats -- zero is the easiest such value. */
16801 if (vecmode == V4SFmode)
16802 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16804 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16808 input = gen_rtx_REG (vecmode, REGNO (input));
16809 emit_move_insn (value, CONST0_RTX (vecmode));
16810 if (vecmode == V4SFmode)
16811 emit_insn (gen_sse_movss (value, value, input));
16813 emit_insn (gen_sse2_movsd (value, value, input));
16816 emit_move_insn (large, two31);
16817 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16819 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16820 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16822 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16823 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16825 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16826 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16828 large = gen_rtx_REG (V4SImode, REGNO (large));
16829 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16831 x = gen_rtx_REG (V4SImode, REGNO (value));
16832 if (vecmode == V4SFmode)
16833 emit_insn (gen_sse2_cvttps2dq (x, value));
16835 emit_insn (gen_sse2_cvttpd2dq (x, value));
16838 emit_insn (gen_xorv4si3 (value, value, large));
16841 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16842 Expects the 64-bit DImode to be supplied in a pair of integral
16843 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16844 -mfpmath=sse, !optimize_size only. */
16847 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16849 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16850 rtx int_xmm, fp_xmm;
16851 rtx biases, exponents;
16854 int_xmm = gen_reg_rtx (V4SImode);
16855 if (TARGET_INTER_UNIT_MOVES)
16856 emit_insn (gen_movdi_to_sse (int_xmm, input));
16857 else if (TARGET_SSE_SPLIT_REGS)
16859 emit_clobber (int_xmm);
16860 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16864 x = gen_reg_rtx (V2DImode);
16865 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16866 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16869 x = gen_rtx_CONST_VECTOR (V4SImode,
16870 gen_rtvec (4, GEN_INT (0x43300000UL),
16871 GEN_INT (0x45300000UL),
16872 const0_rtx, const0_rtx));
16873 exponents = validize_mem (force_const_mem (V4SImode, x));
16875 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16876 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16878 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16879 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16880 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16881 (0x1.0p84 + double(fp_value_hi_xmm)).
16882 Note these exponents differ by 32. */
16884 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16886 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16887 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16888 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16889 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16890 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16891 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16892 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16893 biases = validize_mem (force_const_mem (V2DFmode, biases));
16894 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16896 /* Add the upper and lower DFmode values together. */
16898 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16901 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16902 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16903 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16906 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16909 /* Not used, but eases macroization of patterns. */
16911 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16912 rtx input ATTRIBUTE_UNUSED)
16914 gcc_unreachable ();
16917 /* Convert an unsigned SImode value into a DFmode. Only currently used
16918 for SSE, but applicable anywhere. */
16921 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16923 REAL_VALUE_TYPE TWO31r;
16926 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16927 NULL, 1, OPTAB_DIRECT);
16929 fp = gen_reg_rtx (DFmode);
16930 emit_insn (gen_floatsidf2 (fp, x));
16932 real_ldexp (&TWO31r, &dconst1, 31);
16933 x = const_double_from_real_value (TWO31r, DFmode);
16935 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16937 emit_move_insn (target, x);
16940 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16941 32-bit mode; otherwise we have a direct convert instruction. */
16944 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16946 REAL_VALUE_TYPE TWO32r;
16947 rtx fp_lo, fp_hi, x;
16949 fp_lo = gen_reg_rtx (DFmode);
16950 fp_hi = gen_reg_rtx (DFmode);
16952 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16954 real_ldexp (&TWO32r, &dconst1, 32);
16955 x = const_double_from_real_value (TWO32r, DFmode);
16956 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16958 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16960 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16963 emit_move_insn (target, x);
16966 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16967 For x86_32, -mfpmath=sse, !optimize_size only. */
16969 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16971 REAL_VALUE_TYPE ONE16r;
16972 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16974 real_ldexp (&ONE16r, &dconst1, 16);
16975 x = const_double_from_real_value (ONE16r, SFmode);
16976 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16977 NULL, 0, OPTAB_DIRECT);
16978 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16979 NULL, 0, OPTAB_DIRECT);
16980 fp_hi = gen_reg_rtx (SFmode);
16981 fp_lo = gen_reg_rtx (SFmode);
16982 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16983 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16984 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16986 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16988 if (!rtx_equal_p (target, fp_hi))
16989 emit_move_insn (target, fp_hi);
16992 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16993 then replicate the value for all elements of the vector
16997 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
17001 enum machine_mode scalar_mode;
17014 n_elt = GET_MODE_NUNITS (mode);
17015 v = rtvec_alloc (n_elt);
17016 scalar_mode = GET_MODE_INNER (mode);
17018 RTVEC_ELT (v, 0) = value;
17020 for (i = 1; i < n_elt; ++i)
17021 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
17023 return gen_rtx_CONST_VECTOR (mode, v);
17026 gcc_unreachable ();
17030 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17031 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17032 for an SSE register. If VECT is true, then replicate the mask for
17033 all elements of the vector register. If INVERT is true, then create
17034 a mask excluding the sign bit. */
17037 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
17039 enum machine_mode vec_mode, imode;
17040 HOST_WIDE_INT hi, lo;
17045 /* Find the sign bit, sign extended to 2*HWI. */
17053 mode = GET_MODE_INNER (mode);
17055 lo = 0x80000000, hi = lo < 0;
17063 mode = GET_MODE_INNER (mode);
17065 if (HOST_BITS_PER_WIDE_INT >= 64)
17066 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
17068 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17073 vec_mode = VOIDmode;
17074 if (HOST_BITS_PER_WIDE_INT >= 64)
17077 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
17084 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17088 lo = ~lo, hi = ~hi;
17094 mask = immed_double_const (lo, hi, imode);
17096 vec = gen_rtvec (2, v, mask);
17097 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
17098 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17105 gcc_unreachable ();
17109 lo = ~lo, hi = ~hi;
17111 /* Force this value into the low part of a fp vector constant. */
17112 mask = immed_double_const (lo, hi, imode);
17113 mask = gen_lowpart (mode, mask);
17115 if (vec_mode == VOIDmode)
17116 return force_reg (mode, mask);
17118 v = ix86_build_const_vector (vec_mode, vect, mask);
17119 return force_reg (vec_mode, v);
17122 /* Generate code for floating point ABS or NEG. */
17125 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17128 rtx mask, set, dst, src;
17129 bool use_sse = false;
17130 bool vector_mode = VECTOR_MODE_P (mode);
17131 enum machine_mode vmode = mode;
17135 else if (mode == TFmode)
17137 else if (TARGET_SSE_MATH)
17139 use_sse = SSE_FLOAT_MODE_P (mode);
17140 if (mode == SFmode)
17142 else if (mode == DFmode)
17146 /* NEG and ABS performed with SSE use bitwise mask operations.
17147 Create the appropriate mask now. */
17149 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17156 set = gen_rtx_fmt_e (code, mode, src);
17157 set = gen_rtx_SET (VOIDmode, dst, set);
17164 use = gen_rtx_USE (VOIDmode, mask);
17166 par = gen_rtvec (2, set, use);
17169 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17170 par = gen_rtvec (3, set, use, clob);
17172 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17178 /* Expand a copysign operation. Special case operand 0 being a constant. */
17181 ix86_expand_copysign (rtx operands[])
17183 enum machine_mode mode, vmode;
17184 rtx dest, op0, op1, mask, nmask;
17186 dest = operands[0];
17190 mode = GET_MODE (dest);
17192 if (mode == SFmode)
17194 else if (mode == DFmode)
17199 if (GET_CODE (op0) == CONST_DOUBLE)
17201 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17203 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17204 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17206 if (mode == SFmode || mode == DFmode)
17208 if (op0 == CONST0_RTX (mode))
17209 op0 = CONST0_RTX (vmode);
17212 rtx v = ix86_build_const_vector (vmode, false, op0);
17214 op0 = force_reg (vmode, v);
17217 else if (op0 != CONST0_RTX (mode))
17218 op0 = force_reg (mode, op0);
17220 mask = ix86_build_signbit_mask (vmode, 0, 0);
17222 if (mode == SFmode)
17223 copysign_insn = gen_copysignsf3_const;
17224 else if (mode == DFmode)
17225 copysign_insn = gen_copysigndf3_const;
17227 copysign_insn = gen_copysigntf3_const;
17229 emit_insn (copysign_insn (dest, op0, op1, mask));
17233 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17235 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17236 mask = ix86_build_signbit_mask (vmode, 0, 0);
17238 if (mode == SFmode)
17239 copysign_insn = gen_copysignsf3_var;
17240 else if (mode == DFmode)
17241 copysign_insn = gen_copysigndf3_var;
17243 copysign_insn = gen_copysigntf3_var;
17245 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17249 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17250 be a constant, and so has already been expanded into a vector constant. */
17253 ix86_split_copysign_const (rtx operands[])
17255 enum machine_mode mode, vmode;
17256 rtx dest, op0, mask, x;
17258 dest = operands[0];
17260 mask = operands[3];
17262 mode = GET_MODE (dest);
17263 vmode = GET_MODE (mask);
17265 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17266 x = gen_rtx_AND (vmode, dest, mask);
17267 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17269 if (op0 != CONST0_RTX (vmode))
17271 x = gen_rtx_IOR (vmode, dest, op0);
17272 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17276 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17277 so we have to do two masks. */
17280 ix86_split_copysign_var (rtx operands[])
17282 enum machine_mode mode, vmode;
17283 rtx dest, scratch, op0, op1, mask, nmask, x;
17285 dest = operands[0];
17286 scratch = operands[1];
17289 nmask = operands[4];
17290 mask = operands[5];
17292 mode = GET_MODE (dest);
17293 vmode = GET_MODE (mask);
17295 if (rtx_equal_p (op0, op1))
17297 /* Shouldn't happen often (it's useless, obviously), but when it does
17298 we'd generate incorrect code if we continue below. */
17299 emit_move_insn (dest, op0);
17303 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17305 gcc_assert (REGNO (op1) == REGNO (scratch));
17307 x = gen_rtx_AND (vmode, scratch, mask);
17308 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17311 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17312 x = gen_rtx_NOT (vmode, dest);
17313 x = gen_rtx_AND (vmode, x, op0);
17314 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17318 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17320 x = gen_rtx_AND (vmode, scratch, mask);
17322 else /* alternative 2,4 */
17324 gcc_assert (REGNO (mask) == REGNO (scratch));
17325 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17326 x = gen_rtx_AND (vmode, scratch, op1);
17328 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17330 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17332 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17333 x = gen_rtx_AND (vmode, dest, nmask);
17335 else /* alternative 3,4 */
17337 gcc_assert (REGNO (nmask) == REGNO (dest));
17339 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17340 x = gen_rtx_AND (vmode, dest, op0);
17342 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17345 x = gen_rtx_IOR (vmode, dest, scratch);
17346 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17349 /* Return TRUE or FALSE depending on whether the first SET in INSN
17350 has source and destination with matching CC modes, and that the
17351 CC mode is at least as constrained as REQ_MODE. */
17354 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17357 enum machine_mode set_mode;
17359 set = PATTERN (insn);
17360 if (GET_CODE (set) == PARALLEL)
17361 set = XVECEXP (set, 0, 0);
17362 gcc_assert (GET_CODE (set) == SET);
17363 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17365 set_mode = GET_MODE (SET_DEST (set));
17369 if (req_mode != CCNOmode
17370 && (req_mode != CCmode
17371 || XEXP (SET_SRC (set), 1) != const0_rtx))
17375 if (req_mode == CCGCmode)
17379 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17383 if (req_mode == CCZmode)
17393 if (set_mode != req_mode)
17398 gcc_unreachable ();
17401 return GET_MODE (SET_SRC (set)) == set_mode;
17404 /* Generate insn patterns to do an integer compare of OPERANDS. */
17407 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17409 enum machine_mode cmpmode;
17412 cmpmode = SELECT_CC_MODE (code, op0, op1);
17413 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17415 /* This is very simple, but making the interface the same as in the
17416 FP case makes the rest of the code easier. */
17417 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17418 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17420 /* Return the test that should be put into the flags user, i.e.
17421 the bcc, scc, or cmov instruction. */
17422 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17425 /* Figure out whether to use ordered or unordered fp comparisons.
17426 Return the appropriate mode to use. */
17429 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17431 /* ??? In order to make all comparisons reversible, we do all comparisons
17432 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17433 all forms trapping and nontrapping comparisons, we can make inequality
17434 comparisons trapping again, since it results in better code when using
17435 FCOM based compares. */
17436 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17440 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17442 enum machine_mode mode = GET_MODE (op0);
17444 if (SCALAR_FLOAT_MODE_P (mode))
17446 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17447 return ix86_fp_compare_mode (code);
17452 /* Only zero flag is needed. */
17453 case EQ: /* ZF=0 */
17454 case NE: /* ZF!=0 */
17456 /* Codes needing carry flag. */
17457 case GEU: /* CF=0 */
17458 case LTU: /* CF=1 */
17459 /* Detect overflow checks. They need just the carry flag. */
17460 if (GET_CODE (op0) == PLUS
17461 && rtx_equal_p (op1, XEXP (op0, 0)))
17465 case GTU: /* CF=0 & ZF=0 */
17466 case LEU: /* CF=1 | ZF=1 */
17467 /* Detect overflow checks. They need just the carry flag. */
17468 if (GET_CODE (op0) == MINUS
17469 && rtx_equal_p (op1, XEXP (op0, 0)))
17473 /* Codes possibly doable only with sign flag when
17474 comparing against zero. */
17475 case GE: /* SF=OF or SF=0 */
17476 case LT: /* SF<>OF or SF=1 */
17477 if (op1 == const0_rtx)
17480 /* For other cases Carry flag is not required. */
17482 /* Codes doable only with sign flag when comparing
17483 against zero, but we miss jump instruction for it
17484 so we need to use relational tests against overflow
17485 that thus needs to be zero. */
17486 case GT: /* ZF=0 & SF=OF */
17487 case LE: /* ZF=1 | SF<>OF */
17488 if (op1 == const0_rtx)
17492 /* strcmp pattern do (use flags) and combine may ask us for proper
17497 gcc_unreachable ();
17501 /* Return the fixed registers used for condition codes. */
17504 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17511 /* If two condition code modes are compatible, return a condition code
17512 mode which is compatible with both. Otherwise, return
17515 static enum machine_mode
17516 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17521 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17524 if ((m1 == CCGCmode && m2 == CCGOCmode)
17525 || (m1 == CCGOCmode && m2 == CCGCmode))
17531 gcc_unreachable ();
17561 /* These are only compatible with themselves, which we already
17568 /* Return a comparison we can do and that it is equivalent to
17569 swap_condition (code) apart possibly from orderedness.
17570 But, never change orderedness if TARGET_IEEE_FP, returning
17571 UNKNOWN in that case if necessary. */
17573 static enum rtx_code
17574 ix86_fp_swap_condition (enum rtx_code code)
17578 case GT: /* GTU - CF=0 & ZF=0 */
17579 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17580 case GE: /* GEU - CF=0 */
17581 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17582 case UNLT: /* LTU - CF=1 */
17583 return TARGET_IEEE_FP ? UNKNOWN : GT;
17584 case UNLE: /* LEU - CF=1 | ZF=1 */
17585 return TARGET_IEEE_FP ? UNKNOWN : GE;
17587 return swap_condition (code);
17591 /* Return cost of comparison CODE using the best strategy for performance.
17592 All following functions do use number of instructions as a cost metrics.
17593 In future this should be tweaked to compute bytes for optimize_size and
17594 take into account performance of various instructions on various CPUs. */
17597 ix86_fp_comparison_cost (enum rtx_code code)
17601 /* The cost of code using bit-twiddling on %ah. */
17618 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17622 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17625 gcc_unreachable ();
17628 switch (ix86_fp_comparison_strategy (code))
17630 case IX86_FPCMP_COMI:
17631 return arith_cost > 4 ? 3 : 2;
17632 case IX86_FPCMP_SAHF:
17633 return arith_cost > 4 ? 4 : 3;
17639 /* Return strategy to use for floating-point. We assume that fcomi is always
17640 preferrable where available, since that is also true when looking at size
17641 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17643 enum ix86_fpcmp_strategy
17644 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17646 /* Do fcomi/sahf based test when profitable. */
17649 return IX86_FPCMP_COMI;
17651 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17652 return IX86_FPCMP_SAHF;
17654 return IX86_FPCMP_ARITH;
17657 /* Swap, force into registers, or otherwise massage the two operands
17658 to a fp comparison. The operands are updated in place; the new
17659 comparison code is returned. */
17661 static enum rtx_code
17662 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17664 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17665 rtx op0 = *pop0, op1 = *pop1;
17666 enum machine_mode op_mode = GET_MODE (op0);
17667 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17669 /* All of the unordered compare instructions only work on registers.
17670 The same is true of the fcomi compare instructions. The XFmode
17671 compare instructions require registers except when comparing
17672 against zero or when converting operand 1 from fixed point to
17676 && (fpcmp_mode == CCFPUmode
17677 || (op_mode == XFmode
17678 && ! (standard_80387_constant_p (op0) == 1
17679 || standard_80387_constant_p (op1) == 1)
17680 && GET_CODE (op1) != FLOAT)
17681 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17683 op0 = force_reg (op_mode, op0);
17684 op1 = force_reg (op_mode, op1);
17688 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17689 things around if they appear profitable, otherwise force op0
17690 into a register. */
17692 if (standard_80387_constant_p (op0) == 0
17694 && ! (standard_80387_constant_p (op1) == 0
17697 enum rtx_code new_code = ix86_fp_swap_condition (code);
17698 if (new_code != UNKNOWN)
17701 tmp = op0, op0 = op1, op1 = tmp;
17707 op0 = force_reg (op_mode, op0);
17709 if (CONSTANT_P (op1))
17711 int tmp = standard_80387_constant_p (op1);
17713 op1 = validize_mem (force_const_mem (op_mode, op1));
17717 op1 = force_reg (op_mode, op1);
17720 op1 = force_reg (op_mode, op1);
17724 /* Try to rearrange the comparison to make it cheaper. */
17725 if (ix86_fp_comparison_cost (code)
17726 > ix86_fp_comparison_cost (swap_condition (code))
17727 && (REG_P (op1) || can_create_pseudo_p ()))
17730 tmp = op0, op0 = op1, op1 = tmp;
17731 code = swap_condition (code);
17733 op0 = force_reg (op_mode, op0);
17741 /* Convert comparison codes we use to represent FP comparison to integer
17742 code that will result in proper branch. Return UNKNOWN if no such code
17746 ix86_fp_compare_code_to_integer (enum rtx_code code)
17775 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17778 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17780 enum machine_mode fpcmp_mode, intcmp_mode;
17783 fpcmp_mode = ix86_fp_compare_mode (code);
17784 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17786 /* Do fcomi/sahf based test when profitable. */
17787 switch (ix86_fp_comparison_strategy (code))
17789 case IX86_FPCMP_COMI:
17790 intcmp_mode = fpcmp_mode;
17791 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17792 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17797 case IX86_FPCMP_SAHF:
17798 intcmp_mode = fpcmp_mode;
17799 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17800 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17804 scratch = gen_reg_rtx (HImode);
17805 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17806 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17809 case IX86_FPCMP_ARITH:
17810 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17811 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17812 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17814 scratch = gen_reg_rtx (HImode);
17815 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17817 /* In the unordered case, we have to check C2 for NaN's, which
17818 doesn't happen to work out to anything nice combination-wise.
17819 So do some bit twiddling on the value we've got in AH to come
17820 up with an appropriate set of condition codes. */
17822 intcmp_mode = CCNOmode;
17827 if (code == GT || !TARGET_IEEE_FP)
17829 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17834 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17835 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17836 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17837 intcmp_mode = CCmode;
17843 if (code == LT && TARGET_IEEE_FP)
17845 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17846 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17847 intcmp_mode = CCmode;
17852 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17858 if (code == GE || !TARGET_IEEE_FP)
17860 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17865 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17866 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17872 if (code == LE && TARGET_IEEE_FP)
17874 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17875 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17876 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17877 intcmp_mode = CCmode;
17882 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17888 if (code == EQ && TARGET_IEEE_FP)
17890 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17891 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17892 intcmp_mode = CCmode;
17897 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17903 if (code == NE && TARGET_IEEE_FP)
17905 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17906 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17912 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17918 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17922 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17927 gcc_unreachable ();
17935 /* Return the test that should be put into the flags user, i.e.
17936 the bcc, scc, or cmov instruction. */
17937 return gen_rtx_fmt_ee (code, VOIDmode,
17938 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17943 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17947 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17948 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17950 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17952 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17953 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17956 ret = ix86_expand_int_compare (code, op0, op1);
17962 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17964 enum machine_mode mode = GET_MODE (op0);
17976 tmp = ix86_expand_compare (code, op0, op1);
17977 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17978 gen_rtx_LABEL_REF (VOIDmode, label),
17980 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17987 /* Expand DImode branch into multiple compare+branch. */
17989 rtx lo[2], hi[2], label2;
17990 enum rtx_code code1, code2, code3;
17991 enum machine_mode submode;
17993 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17995 tmp = op0, op0 = op1, op1 = tmp;
17996 code = swap_condition (code);
17999 split_double_mode (mode, &op0, 1, lo+0, hi+0);
18000 split_double_mode (mode, &op1, 1, lo+1, hi+1);
18002 submode = mode == DImode ? SImode : DImode;
18004 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18005 avoid two branches. This costs one extra insn, so disable when
18006 optimizing for size. */
18008 if ((code == EQ || code == NE)
18009 && (!optimize_insn_for_size_p ()
18010 || hi[1] == const0_rtx || lo[1] == const0_rtx))
18015 if (hi[1] != const0_rtx)
18016 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
18017 NULL_RTX, 0, OPTAB_WIDEN);
18020 if (lo[1] != const0_rtx)
18021 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
18022 NULL_RTX, 0, OPTAB_WIDEN);
18024 tmp = expand_binop (submode, ior_optab, xor1, xor0,
18025 NULL_RTX, 0, OPTAB_WIDEN);
18027 ix86_expand_branch (code, tmp, const0_rtx, label);
18031 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18032 op1 is a constant and the low word is zero, then we can just
18033 examine the high word. Similarly for low word -1 and
18034 less-or-equal-than or greater-than. */
18036 if (CONST_INT_P (hi[1]))
18039 case LT: case LTU: case GE: case GEU:
18040 if (lo[1] == const0_rtx)
18042 ix86_expand_branch (code, hi[0], hi[1], label);
18046 case LE: case LEU: case GT: case GTU:
18047 if (lo[1] == constm1_rtx)
18049 ix86_expand_branch (code, hi[0], hi[1], label);
18057 /* Otherwise, we need two or three jumps. */
18059 label2 = gen_label_rtx ();
18062 code2 = swap_condition (code);
18063 code3 = unsigned_condition (code);
18067 case LT: case GT: case LTU: case GTU:
18070 case LE: code1 = LT; code2 = GT; break;
18071 case GE: code1 = GT; code2 = LT; break;
18072 case LEU: code1 = LTU; code2 = GTU; break;
18073 case GEU: code1 = GTU; code2 = LTU; break;
18075 case EQ: code1 = UNKNOWN; code2 = NE; break;
18076 case NE: code2 = UNKNOWN; break;
18079 gcc_unreachable ();
18084 * if (hi(a) < hi(b)) goto true;
18085 * if (hi(a) > hi(b)) goto false;
18086 * if (lo(a) < lo(b)) goto true;
18090 if (code1 != UNKNOWN)
18091 ix86_expand_branch (code1, hi[0], hi[1], label);
18092 if (code2 != UNKNOWN)
18093 ix86_expand_branch (code2, hi[0], hi[1], label2);
18095 ix86_expand_branch (code3, lo[0], lo[1], label);
18097 if (code2 != UNKNOWN)
18098 emit_label (label2);
18103 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18108 /* Split branch based on floating point condition. */
18110 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18111 rtx target1, rtx target2, rtx tmp, rtx pushed)
18116 if (target2 != pc_rtx)
18119 code = reverse_condition_maybe_unordered (code);
18124 condition = ix86_expand_fp_compare (code, op1, op2,
18127 /* Remove pushed operand from stack. */
18129 ix86_free_from_memory (GET_MODE (pushed));
18131 i = emit_jump_insn (gen_rtx_SET
18133 gen_rtx_IF_THEN_ELSE (VOIDmode,
18134 condition, target1, target2)));
18135 if (split_branch_probability >= 0)
18136 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18140 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18144 gcc_assert (GET_MODE (dest) == QImode);
18146 ret = ix86_expand_compare (code, op0, op1);
18147 PUT_MODE (ret, QImode);
18148 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18151 /* Expand comparison setting or clearing carry flag. Return true when
18152 successful and set pop for the operation. */
18154 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18156 enum machine_mode mode =
18157 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18159 /* Do not handle double-mode compares that go through special path. */
18160 if (mode == (TARGET_64BIT ? TImode : DImode))
18163 if (SCALAR_FLOAT_MODE_P (mode))
18165 rtx compare_op, compare_seq;
18167 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18169 /* Shortcut: following common codes never translate
18170 into carry flag compares. */
18171 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18172 || code == ORDERED || code == UNORDERED)
18175 /* These comparisons require zero flag; swap operands so they won't. */
18176 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18177 && !TARGET_IEEE_FP)
18182 code = swap_condition (code);
18185 /* Try to expand the comparison and verify that we end up with
18186 carry flag based comparison. This fails to be true only when
18187 we decide to expand comparison using arithmetic that is not
18188 too common scenario. */
18190 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18191 compare_seq = get_insns ();
18194 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18195 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18196 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18198 code = GET_CODE (compare_op);
18200 if (code != LTU && code != GEU)
18203 emit_insn (compare_seq);
18208 if (!INTEGRAL_MODE_P (mode))
18217 /* Convert a==0 into (unsigned)a<1. */
18220 if (op1 != const0_rtx)
18223 code = (code == EQ ? LTU : GEU);
18226 /* Convert a>b into b<a or a>=b-1. */
18229 if (CONST_INT_P (op1))
18231 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18232 /* Bail out on overflow. We still can swap operands but that
18233 would force loading of the constant into register. */
18234 if (op1 == const0_rtx
18235 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18237 code = (code == GTU ? GEU : LTU);
18244 code = (code == GTU ? LTU : GEU);
18248 /* Convert a>=0 into (unsigned)a<0x80000000. */
18251 if (mode == DImode || op1 != const0_rtx)
18253 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18254 code = (code == LT ? GEU : LTU);
18258 if (mode == DImode || op1 != constm1_rtx)
18260 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18261 code = (code == LE ? GEU : LTU);
18267 /* Swapping operands may cause constant to appear as first operand. */
18268 if (!nonimmediate_operand (op0, VOIDmode))
18270 if (!can_create_pseudo_p ())
18272 op0 = force_reg (mode, op0);
18274 *pop = ix86_expand_compare (code, op0, op1);
18275 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18280 ix86_expand_int_movcc (rtx operands[])
18282 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18283 rtx compare_seq, compare_op;
18284 enum machine_mode mode = GET_MODE (operands[0]);
18285 bool sign_bit_compare_p = false;
18286 rtx op0 = XEXP (operands[1], 0);
18287 rtx op1 = XEXP (operands[1], 1);
18290 compare_op = ix86_expand_compare (code, op0, op1);
18291 compare_seq = get_insns ();
18294 compare_code = GET_CODE (compare_op);
18296 if ((op1 == const0_rtx && (code == GE || code == LT))
18297 || (op1 == constm1_rtx && (code == GT || code == LE)))
18298 sign_bit_compare_p = true;
18300 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18301 HImode insns, we'd be swallowed in word prefix ops. */
18303 if ((mode != HImode || TARGET_FAST_PREFIX)
18304 && (mode != (TARGET_64BIT ? TImode : DImode))
18305 && CONST_INT_P (operands[2])
18306 && CONST_INT_P (operands[3]))
18308 rtx out = operands[0];
18309 HOST_WIDE_INT ct = INTVAL (operands[2]);
18310 HOST_WIDE_INT cf = INTVAL (operands[3]);
18311 HOST_WIDE_INT diff;
18314 /* Sign bit compares are better done using shifts than we do by using
18316 if (sign_bit_compare_p
18317 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18319 /* Detect overlap between destination and compare sources. */
18322 if (!sign_bit_compare_p)
18325 bool fpcmp = false;
18327 compare_code = GET_CODE (compare_op);
18329 flags = XEXP (compare_op, 0);
18331 if (GET_MODE (flags) == CCFPmode
18332 || GET_MODE (flags) == CCFPUmode)
18336 = ix86_fp_compare_code_to_integer (compare_code);
18339 /* To simplify rest of code, restrict to the GEU case. */
18340 if (compare_code == LTU)
18342 HOST_WIDE_INT tmp = ct;
18345 compare_code = reverse_condition (compare_code);
18346 code = reverse_condition (code);
18351 PUT_CODE (compare_op,
18352 reverse_condition_maybe_unordered
18353 (GET_CODE (compare_op)));
18355 PUT_CODE (compare_op,
18356 reverse_condition (GET_CODE (compare_op)));
18360 if (reg_overlap_mentioned_p (out, op0)
18361 || reg_overlap_mentioned_p (out, op1))
18362 tmp = gen_reg_rtx (mode);
18364 if (mode == DImode)
18365 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18367 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18368 flags, compare_op));
18372 if (code == GT || code == GE)
18373 code = reverse_condition (code);
18376 HOST_WIDE_INT tmp = ct;
18381 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18394 tmp = expand_simple_binop (mode, PLUS,
18396 copy_rtx (tmp), 1, OPTAB_DIRECT);
18407 tmp = expand_simple_binop (mode, IOR,
18409 copy_rtx (tmp), 1, OPTAB_DIRECT);
18411 else if (diff == -1 && ct)
18421 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18423 tmp = expand_simple_binop (mode, PLUS,
18424 copy_rtx (tmp), GEN_INT (cf),
18425 copy_rtx (tmp), 1, OPTAB_DIRECT);
18433 * andl cf - ct, dest
18443 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18446 tmp = expand_simple_binop (mode, AND,
18448 gen_int_mode (cf - ct, mode),
18449 copy_rtx (tmp), 1, OPTAB_DIRECT);
18451 tmp = expand_simple_binop (mode, PLUS,
18452 copy_rtx (tmp), GEN_INT (ct),
18453 copy_rtx (tmp), 1, OPTAB_DIRECT);
18456 if (!rtx_equal_p (tmp, out))
18457 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18464 enum machine_mode cmp_mode = GET_MODE (op0);
18467 tmp = ct, ct = cf, cf = tmp;
18470 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18472 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18474 /* We may be reversing unordered compare to normal compare, that
18475 is not valid in general (we may convert non-trapping condition
18476 to trapping one), however on i386 we currently emit all
18477 comparisons unordered. */
18478 compare_code = reverse_condition_maybe_unordered (compare_code);
18479 code = reverse_condition_maybe_unordered (code);
18483 compare_code = reverse_condition (compare_code);
18484 code = reverse_condition (code);
18488 compare_code = UNKNOWN;
18489 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18490 && CONST_INT_P (op1))
18492 if (op1 == const0_rtx
18493 && (code == LT || code == GE))
18494 compare_code = code;
18495 else if (op1 == constm1_rtx)
18499 else if (code == GT)
18504 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18505 if (compare_code != UNKNOWN
18506 && GET_MODE (op0) == GET_MODE (out)
18507 && (cf == -1 || ct == -1))
18509 /* If lea code below could be used, only optimize
18510 if it results in a 2 insn sequence. */
18512 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18513 || diff == 3 || diff == 5 || diff == 9)
18514 || (compare_code == LT && ct == -1)
18515 || (compare_code == GE && cf == -1))
18518 * notl op1 (if necessary)
18526 code = reverse_condition (code);
18529 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18531 out = expand_simple_binop (mode, IOR,
18533 out, 1, OPTAB_DIRECT);
18534 if (out != operands[0])
18535 emit_move_insn (operands[0], out);
18542 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18543 || diff == 3 || diff == 5 || diff == 9)
18544 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18546 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18552 * lea cf(dest*(ct-cf)),dest
18556 * This also catches the degenerate setcc-only case.
18562 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18565 /* On x86_64 the lea instruction operates on Pmode, so we need
18566 to get arithmetics done in proper mode to match. */
18568 tmp = copy_rtx (out);
18572 out1 = copy_rtx (out);
18573 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18577 tmp = gen_rtx_PLUS (mode, tmp, out1);
18583 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18586 if (!rtx_equal_p (tmp, out))
18589 out = force_operand (tmp, copy_rtx (out));
18591 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18593 if (!rtx_equal_p (out, operands[0]))
18594 emit_move_insn (operands[0], copy_rtx (out));
18600 * General case: Jumpful:
18601 * xorl dest,dest cmpl op1, op2
18602 * cmpl op1, op2 movl ct, dest
18603 * setcc dest jcc 1f
18604 * decl dest movl cf, dest
18605 * andl (cf-ct),dest 1:
18608 * Size 20. Size 14.
18610 * This is reasonably steep, but branch mispredict costs are
18611 * high on modern cpus, so consider failing only if optimizing
18615 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18616 && BRANCH_COST (optimize_insn_for_speed_p (),
18621 enum machine_mode cmp_mode = GET_MODE (op0);
18626 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18628 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18630 /* We may be reversing unordered compare to normal compare,
18631 that is not valid in general (we may convert non-trapping
18632 condition to trapping one), however on i386 we currently
18633 emit all comparisons unordered. */
18634 code = reverse_condition_maybe_unordered (code);
18638 code = reverse_condition (code);
18639 if (compare_code != UNKNOWN)
18640 compare_code = reverse_condition (compare_code);
18644 if (compare_code != UNKNOWN)
18646 /* notl op1 (if needed)
18651 For x < 0 (resp. x <= -1) there will be no notl,
18652 so if possible swap the constants to get rid of the
18654 True/false will be -1/0 while code below (store flag
18655 followed by decrement) is 0/-1, so the constants need
18656 to be exchanged once more. */
18658 if (compare_code == GE || !cf)
18660 code = reverse_condition (code);
18665 HOST_WIDE_INT tmp = cf;
18670 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18674 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18676 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18678 copy_rtx (out), 1, OPTAB_DIRECT);
18681 out = expand_simple_binop (mode, AND, copy_rtx (out),
18682 gen_int_mode (cf - ct, mode),
18683 copy_rtx (out), 1, OPTAB_DIRECT);
18685 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18686 copy_rtx (out), 1, OPTAB_DIRECT);
18687 if (!rtx_equal_p (out, operands[0]))
18688 emit_move_insn (operands[0], copy_rtx (out));
18694 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18696 /* Try a few things more with specific constants and a variable. */
18699 rtx var, orig_out, out, tmp;
18701 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18704 /* If one of the two operands is an interesting constant, load a
18705 constant with the above and mask it in with a logical operation. */
18707 if (CONST_INT_P (operands[2]))
18710 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18711 operands[3] = constm1_rtx, op = and_optab;
18712 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18713 operands[3] = const0_rtx, op = ior_optab;
18717 else if (CONST_INT_P (operands[3]))
18720 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18721 operands[2] = constm1_rtx, op = and_optab;
18722 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18723 operands[2] = const0_rtx, op = ior_optab;
18730 orig_out = operands[0];
18731 tmp = gen_reg_rtx (mode);
18734 /* Recurse to get the constant loaded. */
18735 if (ix86_expand_int_movcc (operands) == 0)
18738 /* Mask in the interesting variable. */
18739 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18741 if (!rtx_equal_p (out, orig_out))
18742 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18748 * For comparison with above,
18758 if (! nonimmediate_operand (operands[2], mode))
18759 operands[2] = force_reg (mode, operands[2]);
18760 if (! nonimmediate_operand (operands[3], mode))
18761 operands[3] = force_reg (mode, operands[3]);
18763 if (! register_operand (operands[2], VOIDmode)
18765 || ! register_operand (operands[3], VOIDmode)))
18766 operands[2] = force_reg (mode, operands[2]);
18769 && ! register_operand (operands[3], VOIDmode))
18770 operands[3] = force_reg (mode, operands[3]);
18772 emit_insn (compare_seq);
18773 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18774 gen_rtx_IF_THEN_ELSE (mode,
18775 compare_op, operands[2],
18780 /* Swap, force into registers, or otherwise massage the two operands
18781 to an sse comparison with a mask result. Thus we differ a bit from
18782 ix86_prepare_fp_compare_args which expects to produce a flags result.
18784 The DEST operand exists to help determine whether to commute commutative
18785 operators. The POP0/POP1 operands are updated in place. The new
18786 comparison code is returned, or UNKNOWN if not implementable. */
18788 static enum rtx_code
18789 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18790 rtx *pop0, rtx *pop1)
18798 /* AVX supports all the needed comparisons. */
18801 /* We have no LTGT as an operator. We could implement it with
18802 NE & ORDERED, but this requires an extra temporary. It's
18803 not clear that it's worth it. */
18810 /* These are supported directly. */
18817 /* AVX has 3 operand comparisons, no need to swap anything. */
18820 /* For commutative operators, try to canonicalize the destination
18821 operand to be first in the comparison - this helps reload to
18822 avoid extra moves. */
18823 if (!dest || !rtx_equal_p (dest, *pop1))
18831 /* These are not supported directly before AVX, and furthermore
18832 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
18833 comparison operands to transform into something that is
18838 code = swap_condition (code);
18842 gcc_unreachable ();
18848 /* Detect conditional moves that exactly match min/max operational
18849 semantics. Note that this is IEEE safe, as long as we don't
18850 interchange the operands.
18852 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18853 and TRUE if the operation is successful and instructions are emitted. */
18856 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18857 rtx cmp_op1, rtx if_true, rtx if_false)
18859 enum machine_mode mode;
18865 else if (code == UNGE)
18868 if_true = if_false;
18874 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18876 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18881 mode = GET_MODE (dest);
18883 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18884 but MODE may be a vector mode and thus not appropriate. */
18885 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18887 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18890 if_true = force_reg (mode, if_true);
18891 v = gen_rtvec (2, if_true, if_false);
18892 tmp = gen_rtx_UNSPEC (mode, v, u);
18896 code = is_min ? SMIN : SMAX;
18897 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18900 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18904 /* Expand an sse vector comparison. Return the register with the result. */
18907 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18908 rtx op_true, rtx op_false)
18910 enum machine_mode mode = GET_MODE (dest);
18911 enum machine_mode cmp_mode = GET_MODE (cmp_op0);
18914 cmp_op0 = force_reg (cmp_mode, cmp_op0);
18915 if (!nonimmediate_operand (cmp_op1, cmp_mode))
18916 cmp_op1 = force_reg (cmp_mode, cmp_op1);
18919 || reg_overlap_mentioned_p (dest, op_true)
18920 || reg_overlap_mentioned_p (dest, op_false))
18921 dest = gen_reg_rtx (mode);
18923 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
18924 if (cmp_mode != mode)
18926 x = force_reg (cmp_mode, x);
18927 convert_move (dest, x, false);
18930 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18935 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18936 operations. This is used for both scalar and vector conditional moves. */
18939 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18941 enum machine_mode mode = GET_MODE (dest);
18944 if (vector_all_ones_operand (op_true, GET_MODE (op_true))
18945 && rtx_equal_p (op_false, CONST0_RTX (mode)))
18947 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
18949 else if (op_false == CONST0_RTX (mode))
18951 op_true = force_reg (mode, op_true);
18952 x = gen_rtx_AND (mode, cmp, op_true);
18953 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18955 else if (op_true == CONST0_RTX (mode))
18957 op_false = force_reg (mode, op_false);
18958 x = gen_rtx_NOT (mode, cmp);
18959 x = gen_rtx_AND (mode, x, op_false);
18960 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18962 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
18964 op_false = force_reg (mode, op_false);
18965 x = gen_rtx_IOR (mode, cmp, op_false);
18966 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18968 else if (TARGET_XOP)
18970 op_true = force_reg (mode, op_true);
18972 if (!nonimmediate_operand (op_false, mode))
18973 op_false = force_reg (mode, op_false);
18975 emit_insn (gen_rtx_SET (mode, dest,
18976 gen_rtx_IF_THEN_ELSE (mode, cmp,
18982 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
18984 if (!nonimmediate_operand (op_true, mode))
18985 op_true = force_reg (mode, op_true);
18987 op_false = force_reg (mode, op_false);
18993 gen = gen_sse4_1_blendvps;
18997 gen = gen_sse4_1_blendvpd;
19005 gen = gen_sse4_1_pblendvb;
19006 dest = gen_lowpart (V16QImode, dest);
19007 op_false = gen_lowpart (V16QImode, op_false);
19008 op_true = gen_lowpart (V16QImode, op_true);
19009 cmp = gen_lowpart (V16QImode, cmp);
19014 gen = gen_avx_blendvps256;
19018 gen = gen_avx_blendvpd256;
19026 gen = gen_avx2_pblendvb;
19027 dest = gen_lowpart (V32QImode, dest);
19028 op_false = gen_lowpart (V32QImode, op_false);
19029 op_true = gen_lowpart (V32QImode, op_true);
19030 cmp = gen_lowpart (V32QImode, cmp);
19038 emit_insn (gen (dest, op_false, op_true, cmp));
19041 op_true = force_reg (mode, op_true);
19043 t2 = gen_reg_rtx (mode);
19045 t3 = gen_reg_rtx (mode);
19049 x = gen_rtx_AND (mode, op_true, cmp);
19050 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
19052 x = gen_rtx_NOT (mode, cmp);
19053 x = gen_rtx_AND (mode, x, op_false);
19054 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
19056 x = gen_rtx_IOR (mode, t3, t2);
19057 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19062 /* Expand a floating-point conditional move. Return true if successful. */
19065 ix86_expand_fp_movcc (rtx operands[])
19067 enum machine_mode mode = GET_MODE (operands[0]);
19068 enum rtx_code code = GET_CODE (operands[1]);
19069 rtx tmp, compare_op;
19070 rtx op0 = XEXP (operands[1], 0);
19071 rtx op1 = XEXP (operands[1], 1);
19073 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
19075 enum machine_mode cmode;
19077 /* Since we've no cmove for sse registers, don't force bad register
19078 allocation just to gain access to it. Deny movcc when the
19079 comparison mode doesn't match the move mode. */
19080 cmode = GET_MODE (op0);
19081 if (cmode == VOIDmode)
19082 cmode = GET_MODE (op1);
19086 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
19087 if (code == UNKNOWN)
19090 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
19091 operands[2], operands[3]))
19094 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
19095 operands[2], operands[3]);
19096 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
19100 /* The floating point conditional move instructions don't directly
19101 support conditions resulting from a signed integer comparison. */
19103 compare_op = ix86_expand_compare (code, op0, op1);
19104 if (!fcmov_comparison_operator (compare_op, VOIDmode))
19106 tmp = gen_reg_rtx (QImode);
19107 ix86_expand_setcc (tmp, code, op0, op1);
19109 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
19112 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19113 gen_rtx_IF_THEN_ELSE (mode, compare_op,
19114 operands[2], operands[3])));
19119 /* Expand a floating-point vector conditional move; a vcond operation
19120 rather than a movcc operation. */
19123 ix86_expand_fp_vcond (rtx operands[])
19125 enum rtx_code code = GET_CODE (operands[3]);
19128 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
19129 &operands[4], &operands[5]);
19130 if (code == UNKNOWN)
19133 switch (GET_CODE (operands[3]))
19136 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
19137 operands[5], operands[0], operands[0]);
19138 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
19139 operands[5], operands[1], operands[2]);
19143 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
19144 operands[5], operands[0], operands[0]);
19145 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
19146 operands[5], operands[1], operands[2]);
19150 gcc_unreachable ();
19152 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
19154 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19158 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
19159 operands[5], operands[1], operands[2]))
19162 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
19163 operands[1], operands[2]);
19164 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19168 /* Expand a signed/unsigned integral vector conditional move. */
19171 ix86_expand_int_vcond (rtx operands[])
19173 enum machine_mode mode = GET_MODE (operands[0]);
19174 enum rtx_code code = GET_CODE (operands[3]);
19175 bool negate = false;
19178 cop0 = operands[4];
19179 cop1 = operands[5];
19181 /* XOP supports all of the comparisons on all vector int types. */
19184 /* Canonicalize the comparison to EQ, GT, GTU. */
19195 code = reverse_condition (code);
19201 code = reverse_condition (code);
19207 code = swap_condition (code);
19208 x = cop0, cop0 = cop1, cop1 = x;
19212 gcc_unreachable ();
19215 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19216 if (mode == V2DImode)
19221 /* SSE4.1 supports EQ. */
19222 if (!TARGET_SSE4_1)
19228 /* SSE4.2 supports GT/GTU. */
19229 if (!TARGET_SSE4_2)
19234 gcc_unreachable ();
19238 /* Unsigned parallel compare is not supported by the hardware.
19239 Play some tricks to turn this into a signed comparison
19243 cop0 = force_reg (mode, cop0);
19253 rtx (*gen_sub3) (rtx, rtx, rtx);
19257 case V8SImode: gen_sub3 = gen_subv8si3; break;
19258 case V4DImode: gen_sub3 = gen_subv4di3; break;
19259 case V4SImode: gen_sub3 = gen_subv4si3; break;
19260 case V2DImode: gen_sub3 = gen_subv2di3; break;
19262 gcc_unreachable ();
19264 /* Subtract (-(INT MAX) - 1) from both operands to make
19266 mask = ix86_build_signbit_mask (mode, true, false);
19267 t1 = gen_reg_rtx (mode);
19268 emit_insn (gen_sub3 (t1, cop0, mask));
19270 t2 = gen_reg_rtx (mode);
19271 emit_insn (gen_sub3 (t2, cop1, mask));
19283 /* Perform a parallel unsigned saturating subtraction. */
19284 x = gen_reg_rtx (mode);
19285 emit_insn (gen_rtx_SET (VOIDmode, x,
19286 gen_rtx_US_MINUS (mode, cop0, cop1)));
19289 cop1 = CONST0_RTX (mode);
19295 gcc_unreachable ();
19300 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19301 operands[1+negate], operands[2-negate]);
19303 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19304 operands[2-negate]);
19309 ix86_expand_vshuffle (rtx operands[])
19311 rtx target = operands[0];
19312 rtx op0 = operands[1];
19313 rtx op1 = operands[2];
19314 rtx mask = operands[3];
19316 enum machine_mode mode = GET_MODE (op0);
19317 enum machine_mode maskmode = GET_MODE (mask);
19319 bool one_operand_shuffle = rtx_equal_p (op0, op1);
19321 gcc_checking_assert (GET_MODE_BITSIZE (mode) == 128);
19323 /* Number of elements in the vector. */
19324 w = GET_MODE_NUNITS (mode);
19325 e = GET_MODE_UNIT_SIZE (mode);
19329 /* The XOP VPPERM insn supports three inputs. By ignoring the
19330 one_operand_shuffle special case, we avoid creating another
19331 set of constant vectors in memory. */
19332 one_operand_shuffle = false;
19334 /* mask = mask & {2*w-1, ...} */
19335 vt = GEN_INT (2*w - 1);
19339 /* mask = mask & {w-1, ...} */
19340 vt = GEN_INT (w - 1);
19343 for (i = 0; i < w; i++)
19345 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19346 mask = expand_simple_binop (maskmode, AND, mask, vt,
19347 NULL_RTX, 0, OPTAB_DIRECT);
19349 /* For non-QImode operations, convert the word permutation control
19350 into a byte permutation control. */
19351 if (mode != V16QImode)
19353 mask = expand_simple_binop (maskmode, ASHIFT, mask,
19354 GEN_INT (exact_log2 (e)),
19355 NULL_RTX, 0, OPTAB_DIRECT);
19357 /* Convert mask to vector of chars. */
19358 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
19360 /* Replicate each of the input bytes into byte positions:
19361 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
19362 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
19363 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
19364 for (i = 0; i < 16; ++i)
19365 vec[i] = GEN_INT (i/e * e);
19366 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
19367 vt = force_const_mem (V16QImode, vt);
19369 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
19371 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
19373 /* Convert it into the byte positions by doing
19374 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
19375 for (i = 0; i < 16; ++i)
19376 vec[i] = GEN_INT (i % e);
19377 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
19378 vt = force_const_mem (V16QImode, vt);
19379 emit_insn (gen_addv16qi3 (mask, mask, vt));
19382 /* The actual shuffle operations all operate on V16QImode. */
19383 op0 = gen_lowpart (V16QImode, op0);
19384 op1 = gen_lowpart (V16QImode, op1);
19385 target = gen_lowpart (V16QImode, target);
19389 emit_insn (gen_xop_pperm (target, op0, op1, mask));
19391 else if (one_operand_shuffle)
19393 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
19397 rtx xops[6], t1, t2;
19400 /* Shuffle the two input vectors independently. */
19401 t1 = gen_reg_rtx (V16QImode);
19402 t2 = gen_reg_rtx (V16QImode);
19403 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
19404 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
19406 /* Then merge them together. The key is whether any given control
19407 element contained a bit set that indicates the second word. */
19408 mask = operands[3];
19410 if (maskmode == V2DImode && !TARGET_SSE4_1)
19412 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
19413 more shuffle to convert the V2DI input mask into a V4SI
19414 input mask. At which point the masking that expand_int_vcond
19415 will work as desired. */
19416 rtx t3 = gen_reg_rtx (V4SImode);
19417 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
19418 const0_rtx, const0_rtx,
19419 const2_rtx, const2_rtx));
19421 maskmode = V4SImode;
19425 for (i = 0; i < w; i++)
19427 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19428 vt = force_reg (maskmode, vt);
19429 mask = expand_simple_binop (maskmode, AND, mask, vt,
19430 NULL_RTX, 0, OPTAB_DIRECT);
19432 xops[0] = gen_lowpart (maskmode, operands[0]);
19433 xops[1] = gen_lowpart (maskmode, t2);
19434 xops[2] = gen_lowpart (maskmode, t1);
19435 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
19438 ok = ix86_expand_int_vcond (xops);
19443 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
19444 true if we should do zero extension, else sign extension. HIGH_P is
19445 true if we want the N/2 high elements, else the low elements. */
19448 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
19450 enum machine_mode imode = GET_MODE (operands[1]);
19455 rtx (*unpack)(rtx, rtx);
19461 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
19463 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
19467 unpack = gen_sse4_1_zero_extendv4hiv4si2;
19469 unpack = gen_sse4_1_sign_extendv4hiv4si2;
19473 unpack = gen_sse4_1_zero_extendv2siv2di2;
19475 unpack = gen_sse4_1_sign_extendv2siv2di2;
19478 gcc_unreachable ();
19483 /* Shift higher 8 bytes to lower 8 bytes. */
19484 tmp = gen_reg_rtx (imode);
19485 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
19486 gen_lowpart (V1TImode, operands[1]),
19492 emit_insn (unpack (operands[0], tmp));
19496 rtx (*unpack)(rtx, rtx, rtx);
19502 unpack = gen_vec_interleave_highv16qi;
19504 unpack = gen_vec_interleave_lowv16qi;
19508 unpack = gen_vec_interleave_highv8hi;
19510 unpack = gen_vec_interleave_lowv8hi;
19514 unpack = gen_vec_interleave_highv4si;
19516 unpack = gen_vec_interleave_lowv4si;
19519 gcc_unreachable ();
19522 dest = gen_lowpart (imode, operands[0]);
19525 tmp = force_reg (imode, CONST0_RTX (imode));
19527 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
19528 operands[1], pc_rtx, pc_rtx);
19530 emit_insn (unpack (dest, operands[1], tmp));
19534 /* Expand conditional increment or decrement using adb/sbb instructions.
19535 The default case using setcc followed by the conditional move can be
19536 done by generic code. */
19538 ix86_expand_int_addcc (rtx operands[])
19540 enum rtx_code code = GET_CODE (operands[1]);
19542 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
19544 rtx val = const0_rtx;
19545 bool fpcmp = false;
19546 enum machine_mode mode;
19547 rtx op0 = XEXP (operands[1], 0);
19548 rtx op1 = XEXP (operands[1], 1);
19550 if (operands[3] != const1_rtx
19551 && operands[3] != constm1_rtx)
19553 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
19555 code = GET_CODE (compare_op);
19557 flags = XEXP (compare_op, 0);
19559 if (GET_MODE (flags) == CCFPmode
19560 || GET_MODE (flags) == CCFPUmode)
19563 code = ix86_fp_compare_code_to_integer (code);
19570 PUT_CODE (compare_op,
19571 reverse_condition_maybe_unordered
19572 (GET_CODE (compare_op)));
19574 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
19577 mode = GET_MODE (operands[0]);
19579 /* Construct either adc or sbb insn. */
19580 if ((code == LTU) == (operands[3] == constm1_rtx))
19585 insn = gen_subqi3_carry;
19588 insn = gen_subhi3_carry;
19591 insn = gen_subsi3_carry;
19594 insn = gen_subdi3_carry;
19597 gcc_unreachable ();
19605 insn = gen_addqi3_carry;
19608 insn = gen_addhi3_carry;
19611 insn = gen_addsi3_carry;
19614 insn = gen_adddi3_carry;
19617 gcc_unreachable ();
19620 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
19626 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
19627 but works for floating pointer parameters and nonoffsetable memories.
19628 For pushes, it returns just stack offsets; the values will be saved
19629 in the right order. Maximally three parts are generated. */
19632 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
19637 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
19639 size = (GET_MODE_SIZE (mode) + 4) / 8;
19641 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
19642 gcc_assert (size >= 2 && size <= 4);
19644 /* Optimize constant pool reference to immediates. This is used by fp
19645 moves, that force all constants to memory to allow combining. */
19646 if (MEM_P (operand) && MEM_READONLY_P (operand))
19648 rtx tmp = maybe_get_pool_constant (operand);
19653 if (MEM_P (operand) && !offsettable_memref_p (operand))
19655 /* The only non-offsetable memories we handle are pushes. */
19656 int ok = push_operand (operand, VOIDmode);
19660 operand = copy_rtx (operand);
19661 PUT_MODE (operand, Pmode);
19662 parts[0] = parts[1] = parts[2] = parts[3] = operand;
19666 if (GET_CODE (operand) == CONST_VECTOR)
19668 enum machine_mode imode = int_mode_for_mode (mode);
19669 /* Caution: if we looked through a constant pool memory above,
19670 the operand may actually have a different mode now. That's
19671 ok, since we want to pun this all the way back to an integer. */
19672 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
19673 gcc_assert (operand != NULL);
19679 if (mode == DImode)
19680 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19685 if (REG_P (operand))
19687 gcc_assert (reload_completed);
19688 for (i = 0; i < size; i++)
19689 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
19691 else if (offsettable_memref_p (operand))
19693 operand = adjust_address (operand, SImode, 0);
19694 parts[0] = operand;
19695 for (i = 1; i < size; i++)
19696 parts[i] = adjust_address (operand, SImode, 4 * i);
19698 else if (GET_CODE (operand) == CONST_DOUBLE)
19703 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19707 real_to_target (l, &r, mode);
19708 parts[3] = gen_int_mode (l[3], SImode);
19709 parts[2] = gen_int_mode (l[2], SImode);
19712 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
19713 parts[2] = gen_int_mode (l[2], SImode);
19716 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
19719 gcc_unreachable ();
19721 parts[1] = gen_int_mode (l[1], SImode);
19722 parts[0] = gen_int_mode (l[0], SImode);
19725 gcc_unreachable ();
19730 if (mode == TImode)
19731 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19732 if (mode == XFmode || mode == TFmode)
19734 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19735 if (REG_P (operand))
19737 gcc_assert (reload_completed);
19738 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19739 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19741 else if (offsettable_memref_p (operand))
19743 operand = adjust_address (operand, DImode, 0);
19744 parts[0] = operand;
19745 parts[1] = adjust_address (operand, upper_mode, 8);
19747 else if (GET_CODE (operand) == CONST_DOUBLE)
19752 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19753 real_to_target (l, &r, mode);
19755 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19756 if (HOST_BITS_PER_WIDE_INT >= 64)
19759 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19760 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19763 parts[0] = immed_double_const (l[0], l[1], DImode);
19765 if (upper_mode == SImode)
19766 parts[1] = gen_int_mode (l[2], SImode);
19767 else if (HOST_BITS_PER_WIDE_INT >= 64)
19770 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19771 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19774 parts[1] = immed_double_const (l[2], l[3], DImode);
19777 gcc_unreachable ();
19784 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19785 Return false when normal moves are needed; true when all required
19786 insns have been emitted. Operands 2-4 contain the input values
19787 int the correct order; operands 5-7 contain the output values. */
19790 ix86_split_long_move (rtx operands[])
19795 int collisions = 0;
19796 enum machine_mode mode = GET_MODE (operands[0]);
19797 bool collisionparts[4];
19799 /* The DFmode expanders may ask us to move double.
19800 For 64bit target this is single move. By hiding the fact
19801 here we simplify i386.md splitters. */
19802 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19804 /* Optimize constant pool reference to immediates. This is used by
19805 fp moves, that force all constants to memory to allow combining. */
19807 if (MEM_P (operands[1])
19808 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19809 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19810 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19811 if (push_operand (operands[0], VOIDmode))
19813 operands[0] = copy_rtx (operands[0]);
19814 PUT_MODE (operands[0], Pmode);
19817 operands[0] = gen_lowpart (DImode, operands[0]);
19818 operands[1] = gen_lowpart (DImode, operands[1]);
19819 emit_move_insn (operands[0], operands[1]);
19823 /* The only non-offsettable memory we handle is push. */
19824 if (push_operand (operands[0], VOIDmode))
19827 gcc_assert (!MEM_P (operands[0])
19828 || offsettable_memref_p (operands[0]));
19830 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19831 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19833 /* When emitting push, take care for source operands on the stack. */
19834 if (push && MEM_P (operands[1])
19835 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19837 rtx src_base = XEXP (part[1][nparts - 1], 0);
19839 /* Compensate for the stack decrement by 4. */
19840 if (!TARGET_64BIT && nparts == 3
19841 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19842 src_base = plus_constant (src_base, 4);
19844 /* src_base refers to the stack pointer and is
19845 automatically decreased by emitted push. */
19846 for (i = 0; i < nparts; i++)
19847 part[1][i] = change_address (part[1][i],
19848 GET_MODE (part[1][i]), src_base);
19851 /* We need to do copy in the right order in case an address register
19852 of the source overlaps the destination. */
19853 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19857 for (i = 0; i < nparts; i++)
19860 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19861 if (collisionparts[i])
19865 /* Collision in the middle part can be handled by reordering. */
19866 if (collisions == 1 && nparts == 3 && collisionparts [1])
19868 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19869 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19871 else if (collisions == 1
19873 && (collisionparts [1] || collisionparts [2]))
19875 if (collisionparts [1])
19877 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19878 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19882 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19883 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19887 /* If there are more collisions, we can't handle it by reordering.
19888 Do an lea to the last part and use only one colliding move. */
19889 else if (collisions > 1)
19895 base = part[0][nparts - 1];
19897 /* Handle the case when the last part isn't valid for lea.
19898 Happens in 64-bit mode storing the 12-byte XFmode. */
19899 if (GET_MODE (base) != Pmode)
19900 base = gen_rtx_REG (Pmode, REGNO (base));
19902 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19903 part[1][0] = replace_equiv_address (part[1][0], base);
19904 for (i = 1; i < nparts; i++)
19906 tmp = plus_constant (base, UNITS_PER_WORD * i);
19907 part[1][i] = replace_equiv_address (part[1][i], tmp);
19918 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19919 emit_insn (gen_addsi3 (stack_pointer_rtx,
19920 stack_pointer_rtx, GEN_INT (-4)));
19921 emit_move_insn (part[0][2], part[1][2]);
19923 else if (nparts == 4)
19925 emit_move_insn (part[0][3], part[1][3]);
19926 emit_move_insn (part[0][2], part[1][2]);
19931 /* In 64bit mode we don't have 32bit push available. In case this is
19932 register, it is OK - we will just use larger counterpart. We also
19933 retype memory - these comes from attempt to avoid REX prefix on
19934 moving of second half of TFmode value. */
19935 if (GET_MODE (part[1][1]) == SImode)
19937 switch (GET_CODE (part[1][1]))
19940 part[1][1] = adjust_address (part[1][1], DImode, 0);
19944 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19948 gcc_unreachable ();
19951 if (GET_MODE (part[1][0]) == SImode)
19952 part[1][0] = part[1][1];
19955 emit_move_insn (part[0][1], part[1][1]);
19956 emit_move_insn (part[0][0], part[1][0]);
19960 /* Choose correct order to not overwrite the source before it is copied. */
19961 if ((REG_P (part[0][0])
19962 && REG_P (part[1][1])
19963 && (REGNO (part[0][0]) == REGNO (part[1][1])
19965 && REGNO (part[0][0]) == REGNO (part[1][2]))
19967 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19969 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19971 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19973 operands[2 + i] = part[0][j];
19974 operands[6 + i] = part[1][j];
19979 for (i = 0; i < nparts; i++)
19981 operands[2 + i] = part[0][i];
19982 operands[6 + i] = part[1][i];
19986 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19987 if (optimize_insn_for_size_p ())
19989 for (j = 0; j < nparts - 1; j++)
19990 if (CONST_INT_P (operands[6 + j])
19991 && operands[6 + j] != const0_rtx
19992 && REG_P (operands[2 + j]))
19993 for (i = j; i < nparts - 1; i++)
19994 if (CONST_INT_P (operands[7 + i])
19995 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19996 operands[7 + i] = operands[2 + j];
19999 for (i = 0; i < nparts; i++)
20000 emit_move_insn (operands[2 + i], operands[6 + i]);
20005 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20006 left shift by a constant, either using a single shift or
20007 a sequence of add instructions. */
20010 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
20012 rtx (*insn)(rtx, rtx, rtx);
20015 || (count * ix86_cost->add <= ix86_cost->shift_const
20016 && !optimize_insn_for_size_p ()))
20018 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
20019 while (count-- > 0)
20020 emit_insn (insn (operand, operand, operand));
20024 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20025 emit_insn (insn (operand, operand, GEN_INT (count)));
20030 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
20032 rtx (*gen_ashl3)(rtx, rtx, rtx);
20033 rtx (*gen_shld)(rtx, rtx, rtx);
20034 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20036 rtx low[2], high[2];
20039 if (CONST_INT_P (operands[2]))
20041 split_double_mode (mode, operands, 2, low, high);
20042 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20044 if (count >= half_width)
20046 emit_move_insn (high[0], low[1]);
20047 emit_move_insn (low[0], const0_rtx);
20049 if (count > half_width)
20050 ix86_expand_ashl_const (high[0], count - half_width, mode);
20054 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20056 if (!rtx_equal_p (operands[0], operands[1]))
20057 emit_move_insn (operands[0], operands[1]);
20059 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
20060 ix86_expand_ashl_const (low[0], count, mode);
20065 split_double_mode (mode, operands, 1, low, high);
20067 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20069 if (operands[1] == const1_rtx)
20071 /* Assuming we've chosen a QImode capable registers, then 1 << N
20072 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20073 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
20075 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
20077 ix86_expand_clear (low[0]);
20078 ix86_expand_clear (high[0]);
20079 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
20081 d = gen_lowpart (QImode, low[0]);
20082 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20083 s = gen_rtx_EQ (QImode, flags, const0_rtx);
20084 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20086 d = gen_lowpart (QImode, high[0]);
20087 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20088 s = gen_rtx_NE (QImode, flags, const0_rtx);
20089 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20092 /* Otherwise, we can get the same results by manually performing
20093 a bit extract operation on bit 5/6, and then performing the two
20094 shifts. The two methods of getting 0/1 into low/high are exactly
20095 the same size. Avoiding the shift in the bit extract case helps
20096 pentium4 a bit; no one else seems to care much either way. */
20099 enum machine_mode half_mode;
20100 rtx (*gen_lshr3)(rtx, rtx, rtx);
20101 rtx (*gen_and3)(rtx, rtx, rtx);
20102 rtx (*gen_xor3)(rtx, rtx, rtx);
20103 HOST_WIDE_INT bits;
20106 if (mode == DImode)
20108 half_mode = SImode;
20109 gen_lshr3 = gen_lshrsi3;
20110 gen_and3 = gen_andsi3;
20111 gen_xor3 = gen_xorsi3;
20116 half_mode = DImode;
20117 gen_lshr3 = gen_lshrdi3;
20118 gen_and3 = gen_anddi3;
20119 gen_xor3 = gen_xordi3;
20123 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
20124 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
20126 x = gen_lowpart (half_mode, operands[2]);
20127 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
20129 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
20130 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
20131 emit_move_insn (low[0], high[0]);
20132 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
20135 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20136 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
20140 if (operands[1] == constm1_rtx)
20142 /* For -1 << N, we can avoid the shld instruction, because we
20143 know that we're shifting 0...31/63 ones into a -1. */
20144 emit_move_insn (low[0], constm1_rtx);
20145 if (optimize_insn_for_size_p ())
20146 emit_move_insn (high[0], low[0]);
20148 emit_move_insn (high[0], constm1_rtx);
20152 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20154 if (!rtx_equal_p (operands[0], operands[1]))
20155 emit_move_insn (operands[0], operands[1]);
20157 split_double_mode (mode, operands, 1, low, high);
20158 emit_insn (gen_shld (high[0], low[0], operands[2]));
20161 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20163 if (TARGET_CMOVE && scratch)
20165 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20166 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20168 ix86_expand_clear (scratch);
20169 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
20173 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
20174 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20176 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
20181 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
20183 rtx (*gen_ashr3)(rtx, rtx, rtx)
20184 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
20185 rtx (*gen_shrd)(rtx, rtx, rtx);
20186 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20188 rtx low[2], high[2];
20191 if (CONST_INT_P (operands[2]))
20193 split_double_mode (mode, operands, 2, low, high);
20194 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20196 if (count == GET_MODE_BITSIZE (mode) - 1)
20198 emit_move_insn (high[0], high[1]);
20199 emit_insn (gen_ashr3 (high[0], high[0],
20200 GEN_INT (half_width - 1)));
20201 emit_move_insn (low[0], high[0]);
20204 else if (count >= half_width)
20206 emit_move_insn (low[0], high[1]);
20207 emit_move_insn (high[0], low[0]);
20208 emit_insn (gen_ashr3 (high[0], high[0],
20209 GEN_INT (half_width - 1)));
20211 if (count > half_width)
20212 emit_insn (gen_ashr3 (low[0], low[0],
20213 GEN_INT (count - half_width)));
20217 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20219 if (!rtx_equal_p (operands[0], operands[1]))
20220 emit_move_insn (operands[0], operands[1]);
20222 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
20223 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
20228 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20230 if (!rtx_equal_p (operands[0], operands[1]))
20231 emit_move_insn (operands[0], operands[1]);
20233 split_double_mode (mode, operands, 1, low, high);
20235 emit_insn (gen_shrd (low[0], high[0], operands[2]));
20236 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
20238 if (TARGET_CMOVE && scratch)
20240 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20241 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20243 emit_move_insn (scratch, high[0]);
20244 emit_insn (gen_ashr3 (scratch, scratch,
20245 GEN_INT (half_width - 1)));
20246 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
20251 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
20252 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
20254 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
20260 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
20262 rtx (*gen_lshr3)(rtx, rtx, rtx)
20263 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
20264 rtx (*gen_shrd)(rtx, rtx, rtx);
20265 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20267 rtx low[2], high[2];
20270 if (CONST_INT_P (operands[2]))
20272 split_double_mode (mode, operands, 2, low, high);
20273 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20275 if (count >= half_width)
20277 emit_move_insn (low[0], high[1]);
20278 ix86_expand_clear (high[0]);
20280 if (count > half_width)
20281 emit_insn (gen_lshr3 (low[0], low[0],
20282 GEN_INT (count - half_width)));
20286 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20288 if (!rtx_equal_p (operands[0], operands[1]))
20289 emit_move_insn (operands[0], operands[1]);
20291 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
20292 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
20297 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20299 if (!rtx_equal_p (operands[0], operands[1]))
20300 emit_move_insn (operands[0], operands[1]);
20302 split_double_mode (mode, operands, 1, low, high);
20304 emit_insn (gen_shrd (low[0], high[0], operands[2]));
20305 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
20307 if (TARGET_CMOVE && scratch)
20309 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20310 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20312 ix86_expand_clear (scratch);
20313 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
20318 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
20319 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20321 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
20326 /* Predict just emitted jump instruction to be taken with probability PROB. */
20328 predict_jump (int prob)
20330 rtx insn = get_last_insn ();
20331 gcc_assert (JUMP_P (insn));
20332 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
20335 /* Helper function for the string operations below. Dest VARIABLE whether
20336 it is aligned to VALUE bytes. If true, jump to the label. */
20338 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
20340 rtx label = gen_label_rtx ();
20341 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
20342 if (GET_MODE (variable) == DImode)
20343 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
20345 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
20346 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
20349 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20351 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20355 /* Adjust COUNTER by the VALUE. */
20357 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
20359 rtx (*gen_add)(rtx, rtx, rtx)
20360 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
20362 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
20365 /* Zero extend possibly SImode EXP to Pmode register. */
20367 ix86_zero_extend_to_Pmode (rtx exp)
20370 if (GET_MODE (exp) == VOIDmode)
20371 return force_reg (Pmode, exp);
20372 if (GET_MODE (exp) == Pmode)
20373 return copy_to_mode_reg (Pmode, exp);
20374 r = gen_reg_rtx (Pmode);
20375 emit_insn (gen_zero_extendsidi2 (r, exp));
20379 /* Divide COUNTREG by SCALE. */
20381 scale_counter (rtx countreg, int scale)
20387 if (CONST_INT_P (countreg))
20388 return GEN_INT (INTVAL (countreg) / scale);
20389 gcc_assert (REG_P (countreg));
20391 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
20392 GEN_INT (exact_log2 (scale)),
20393 NULL, 1, OPTAB_DIRECT);
20397 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
20398 DImode for constant loop counts. */
20400 static enum machine_mode
20401 counter_mode (rtx count_exp)
20403 if (GET_MODE (count_exp) != VOIDmode)
20404 return GET_MODE (count_exp);
20405 if (!CONST_INT_P (count_exp))
20407 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
20412 /* When SRCPTR is non-NULL, output simple loop to move memory
20413 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
20414 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
20415 equivalent loop to set memory by VALUE (supposed to be in MODE).
20417 The size is rounded down to whole number of chunk size moved at once.
20418 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
20422 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
20423 rtx destptr, rtx srcptr, rtx value,
20424 rtx count, enum machine_mode mode, int unroll,
20427 rtx out_label, top_label, iter, tmp;
20428 enum machine_mode iter_mode = counter_mode (count);
20429 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
20430 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
20436 top_label = gen_label_rtx ();
20437 out_label = gen_label_rtx ();
20438 iter = gen_reg_rtx (iter_mode);
20440 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
20441 NULL, 1, OPTAB_DIRECT);
20442 /* Those two should combine. */
20443 if (piece_size == const1_rtx)
20445 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
20447 predict_jump (REG_BR_PROB_BASE * 10 / 100);
20449 emit_move_insn (iter, const0_rtx);
20451 emit_label (top_label);
20453 tmp = convert_modes (Pmode, iter_mode, iter, true);
20454 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
20455 destmem = change_address (destmem, mode, x_addr);
20459 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
20460 srcmem = change_address (srcmem, mode, y_addr);
20462 /* When unrolling for chips that reorder memory reads and writes,
20463 we can save registers by using single temporary.
20464 Also using 4 temporaries is overkill in 32bit mode. */
20465 if (!TARGET_64BIT && 0)
20467 for (i = 0; i < unroll; i++)
20472 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20474 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20476 emit_move_insn (destmem, srcmem);
20482 gcc_assert (unroll <= 4);
20483 for (i = 0; i < unroll; i++)
20485 tmpreg[i] = gen_reg_rtx (mode);
20489 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20491 emit_move_insn (tmpreg[i], srcmem);
20493 for (i = 0; i < unroll; i++)
20498 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20500 emit_move_insn (destmem, tmpreg[i]);
20505 for (i = 0; i < unroll; i++)
20509 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20510 emit_move_insn (destmem, value);
20513 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
20514 true, OPTAB_LIB_WIDEN);
20516 emit_move_insn (iter, tmp);
20518 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
20520 if (expected_size != -1)
20522 expected_size /= GET_MODE_SIZE (mode) * unroll;
20523 if (expected_size == 0)
20525 else if (expected_size > REG_BR_PROB_BASE)
20526 predict_jump (REG_BR_PROB_BASE - 1);
20528 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
20531 predict_jump (REG_BR_PROB_BASE * 80 / 100);
20532 iter = ix86_zero_extend_to_Pmode (iter);
20533 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
20534 true, OPTAB_LIB_WIDEN);
20535 if (tmp != destptr)
20536 emit_move_insn (destptr, tmp);
20539 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
20540 true, OPTAB_LIB_WIDEN);
20542 emit_move_insn (srcptr, tmp);
20544 emit_label (out_label);
20547 /* Output "rep; mov" instruction.
20548 Arguments have same meaning as for previous function */
20550 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
20551 rtx destptr, rtx srcptr,
20553 enum machine_mode mode)
20558 HOST_WIDE_INT rounded_count;
20560 /* If the size is known, it is shorter to use rep movs. */
20561 if (mode == QImode && CONST_INT_P (count)
20562 && !(INTVAL (count) & 3))
20565 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20566 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20567 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
20568 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
20569 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20570 if (mode != QImode)
20572 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20573 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20574 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20575 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
20576 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20577 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
20581 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20582 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
20584 if (CONST_INT_P (count))
20586 rounded_count = (INTVAL (count)
20587 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20588 destmem = shallow_copy_rtx (destmem);
20589 srcmem = shallow_copy_rtx (srcmem);
20590 set_mem_size (destmem, rounded_count);
20591 set_mem_size (srcmem, rounded_count);
20595 if (MEM_SIZE_KNOWN_P (destmem))
20596 clear_mem_size (destmem);
20597 if (MEM_SIZE_KNOWN_P (srcmem))
20598 clear_mem_size (srcmem);
20600 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
20604 /* Output "rep; stos" instruction.
20605 Arguments have same meaning as for previous function */
20607 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
20608 rtx count, enum machine_mode mode,
20613 HOST_WIDE_INT rounded_count;
20615 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20616 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20617 value = force_reg (mode, gen_lowpart (mode, value));
20618 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20619 if (mode != QImode)
20621 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20622 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20623 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20626 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20627 if (orig_value == const0_rtx && CONST_INT_P (count))
20629 rounded_count = (INTVAL (count)
20630 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20631 destmem = shallow_copy_rtx (destmem);
20632 set_mem_size (destmem, rounded_count);
20634 else if (MEM_SIZE_KNOWN_P (destmem))
20635 clear_mem_size (destmem);
20636 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
20640 emit_strmov (rtx destmem, rtx srcmem,
20641 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
20643 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
20644 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
20645 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20648 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20650 expand_movmem_epilogue (rtx destmem, rtx srcmem,
20651 rtx destptr, rtx srcptr, rtx count, int max_size)
20654 if (CONST_INT_P (count))
20656 HOST_WIDE_INT countval = INTVAL (count);
20659 if ((countval & 0x10) && max_size > 16)
20663 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20664 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
20667 gcc_unreachable ();
20670 if ((countval & 0x08) && max_size > 8)
20673 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20676 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20677 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
20681 if ((countval & 0x04) && max_size > 4)
20683 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20686 if ((countval & 0x02) && max_size > 2)
20688 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
20691 if ((countval & 0x01) && max_size > 1)
20693 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
20700 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
20701 count, 1, OPTAB_DIRECT);
20702 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
20703 count, QImode, 1, 4);
20707 /* When there are stringops, we can cheaply increase dest and src pointers.
20708 Otherwise we save code size by maintaining offset (zero is readily
20709 available from preceding rep operation) and using x86 addressing modes.
20711 if (TARGET_SINGLE_STRINGOP)
20715 rtx label = ix86_expand_aligntest (count, 4, true);
20716 src = change_address (srcmem, SImode, srcptr);
20717 dest = change_address (destmem, SImode, destptr);
20718 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20719 emit_label (label);
20720 LABEL_NUSES (label) = 1;
20724 rtx label = ix86_expand_aligntest (count, 2, true);
20725 src = change_address (srcmem, HImode, srcptr);
20726 dest = change_address (destmem, HImode, destptr);
20727 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20728 emit_label (label);
20729 LABEL_NUSES (label) = 1;
20733 rtx label = ix86_expand_aligntest (count, 1, true);
20734 src = change_address (srcmem, QImode, srcptr);
20735 dest = change_address (destmem, QImode, destptr);
20736 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20737 emit_label (label);
20738 LABEL_NUSES (label) = 1;
20743 rtx offset = force_reg (Pmode, const0_rtx);
20748 rtx label = ix86_expand_aligntest (count, 4, true);
20749 src = change_address (srcmem, SImode, srcptr);
20750 dest = change_address (destmem, SImode, destptr);
20751 emit_move_insn (dest, src);
20752 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20753 true, OPTAB_LIB_WIDEN);
20755 emit_move_insn (offset, tmp);
20756 emit_label (label);
20757 LABEL_NUSES (label) = 1;
20761 rtx label = ix86_expand_aligntest (count, 2, true);
20762 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20763 src = change_address (srcmem, HImode, tmp);
20764 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20765 dest = change_address (destmem, HImode, tmp);
20766 emit_move_insn (dest, src);
20767 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20768 true, OPTAB_LIB_WIDEN);
20770 emit_move_insn (offset, tmp);
20771 emit_label (label);
20772 LABEL_NUSES (label) = 1;
20776 rtx label = ix86_expand_aligntest (count, 1, true);
20777 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20778 src = change_address (srcmem, QImode, tmp);
20779 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20780 dest = change_address (destmem, QImode, tmp);
20781 emit_move_insn (dest, src);
20782 emit_label (label);
20783 LABEL_NUSES (label) = 1;
20788 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20790 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20791 rtx count, int max_size)
20794 expand_simple_binop (counter_mode (count), AND, count,
20795 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20796 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20797 gen_lowpart (QImode, value), count, QImode,
20801 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20803 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20807 if (CONST_INT_P (count))
20809 HOST_WIDE_INT countval = INTVAL (count);
20812 if ((countval & 0x10) && max_size > 16)
20816 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20817 emit_insn (gen_strset (destptr, dest, value));
20818 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20819 emit_insn (gen_strset (destptr, dest, value));
20822 gcc_unreachable ();
20825 if ((countval & 0x08) && max_size > 8)
20829 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20830 emit_insn (gen_strset (destptr, dest, value));
20834 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20835 emit_insn (gen_strset (destptr, dest, value));
20836 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20837 emit_insn (gen_strset (destptr, dest, value));
20841 if ((countval & 0x04) && max_size > 4)
20843 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20844 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20847 if ((countval & 0x02) && max_size > 2)
20849 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20850 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20853 if ((countval & 0x01) && max_size > 1)
20855 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20856 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20863 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20868 rtx label = ix86_expand_aligntest (count, 16, true);
20871 dest = change_address (destmem, DImode, destptr);
20872 emit_insn (gen_strset (destptr, dest, value));
20873 emit_insn (gen_strset (destptr, dest, value));
20877 dest = change_address (destmem, SImode, destptr);
20878 emit_insn (gen_strset (destptr, dest, value));
20879 emit_insn (gen_strset (destptr, dest, value));
20880 emit_insn (gen_strset (destptr, dest, value));
20881 emit_insn (gen_strset (destptr, dest, value));
20883 emit_label (label);
20884 LABEL_NUSES (label) = 1;
20888 rtx label = ix86_expand_aligntest (count, 8, true);
20891 dest = change_address (destmem, DImode, destptr);
20892 emit_insn (gen_strset (destptr, dest, value));
20896 dest = change_address (destmem, SImode, destptr);
20897 emit_insn (gen_strset (destptr, dest, value));
20898 emit_insn (gen_strset (destptr, dest, value));
20900 emit_label (label);
20901 LABEL_NUSES (label) = 1;
20905 rtx label = ix86_expand_aligntest (count, 4, true);
20906 dest = change_address (destmem, SImode, destptr);
20907 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20908 emit_label (label);
20909 LABEL_NUSES (label) = 1;
20913 rtx label = ix86_expand_aligntest (count, 2, true);
20914 dest = change_address (destmem, HImode, destptr);
20915 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20916 emit_label (label);
20917 LABEL_NUSES (label) = 1;
20921 rtx label = ix86_expand_aligntest (count, 1, true);
20922 dest = change_address (destmem, QImode, destptr);
20923 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20924 emit_label (label);
20925 LABEL_NUSES (label) = 1;
20929 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20930 DESIRED_ALIGNMENT. */
20932 expand_movmem_prologue (rtx destmem, rtx srcmem,
20933 rtx destptr, rtx srcptr, rtx count,
20934 int align, int desired_alignment)
20936 if (align <= 1 && desired_alignment > 1)
20938 rtx label = ix86_expand_aligntest (destptr, 1, false);
20939 srcmem = change_address (srcmem, QImode, srcptr);
20940 destmem = change_address (destmem, QImode, destptr);
20941 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20942 ix86_adjust_counter (count, 1);
20943 emit_label (label);
20944 LABEL_NUSES (label) = 1;
20946 if (align <= 2 && desired_alignment > 2)
20948 rtx label = ix86_expand_aligntest (destptr, 2, false);
20949 srcmem = change_address (srcmem, HImode, srcptr);
20950 destmem = change_address (destmem, HImode, destptr);
20951 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20952 ix86_adjust_counter (count, 2);
20953 emit_label (label);
20954 LABEL_NUSES (label) = 1;
20956 if (align <= 4 && desired_alignment > 4)
20958 rtx label = ix86_expand_aligntest (destptr, 4, false);
20959 srcmem = change_address (srcmem, SImode, srcptr);
20960 destmem = change_address (destmem, SImode, destptr);
20961 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20962 ix86_adjust_counter (count, 4);
20963 emit_label (label);
20964 LABEL_NUSES (label) = 1;
20966 gcc_assert (desired_alignment <= 8);
20969 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20970 ALIGN_BYTES is how many bytes need to be copied. */
20972 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20973 int desired_align, int align_bytes)
20976 rtx orig_dst = dst;
20977 rtx orig_src = src;
20979 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20980 if (src_align_bytes >= 0)
20981 src_align_bytes = desired_align - src_align_bytes;
20982 if (align_bytes & 1)
20984 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20985 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20987 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20989 if (align_bytes & 2)
20991 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20992 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20993 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20994 set_mem_align (dst, 2 * BITS_PER_UNIT);
20995 if (src_align_bytes >= 0
20996 && (src_align_bytes & 1) == (align_bytes & 1)
20997 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20998 set_mem_align (src, 2 * BITS_PER_UNIT);
21000 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21002 if (align_bytes & 4)
21004 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21005 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
21006 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21007 set_mem_align (dst, 4 * BITS_PER_UNIT);
21008 if (src_align_bytes >= 0)
21010 unsigned int src_align = 0;
21011 if ((src_align_bytes & 3) == (align_bytes & 3))
21013 else if ((src_align_bytes & 1) == (align_bytes & 1))
21015 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21016 set_mem_align (src, src_align * BITS_PER_UNIT);
21019 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21021 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21022 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
21023 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21024 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21025 if (src_align_bytes >= 0)
21027 unsigned int src_align = 0;
21028 if ((src_align_bytes & 7) == (align_bytes & 7))
21030 else if ((src_align_bytes & 3) == (align_bytes & 3))
21032 else if ((src_align_bytes & 1) == (align_bytes & 1))
21034 if (src_align > (unsigned int) desired_align)
21035 src_align = desired_align;
21036 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21037 set_mem_align (src, src_align * BITS_PER_UNIT);
21039 if (MEM_SIZE_KNOWN_P (orig_dst))
21040 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21041 if (MEM_SIZE_KNOWN_P (orig_src))
21042 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
21047 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21048 DESIRED_ALIGNMENT. */
21050 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
21051 int align, int desired_alignment)
21053 if (align <= 1 && desired_alignment > 1)
21055 rtx label = ix86_expand_aligntest (destptr, 1, false);
21056 destmem = change_address (destmem, QImode, destptr);
21057 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
21058 ix86_adjust_counter (count, 1);
21059 emit_label (label);
21060 LABEL_NUSES (label) = 1;
21062 if (align <= 2 && desired_alignment > 2)
21064 rtx label = ix86_expand_aligntest (destptr, 2, false);
21065 destmem = change_address (destmem, HImode, destptr);
21066 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
21067 ix86_adjust_counter (count, 2);
21068 emit_label (label);
21069 LABEL_NUSES (label) = 1;
21071 if (align <= 4 && desired_alignment > 4)
21073 rtx label = ix86_expand_aligntest (destptr, 4, false);
21074 destmem = change_address (destmem, SImode, destptr);
21075 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
21076 ix86_adjust_counter (count, 4);
21077 emit_label (label);
21078 LABEL_NUSES (label) = 1;
21080 gcc_assert (desired_alignment <= 8);
21083 /* Set enough from DST to align DST known to by aligned by ALIGN to
21084 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21086 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
21087 int desired_align, int align_bytes)
21090 rtx orig_dst = dst;
21091 if (align_bytes & 1)
21093 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21095 emit_insn (gen_strset (destreg, dst,
21096 gen_lowpart (QImode, value)));
21098 if (align_bytes & 2)
21100 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21101 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21102 set_mem_align (dst, 2 * BITS_PER_UNIT);
21104 emit_insn (gen_strset (destreg, dst,
21105 gen_lowpart (HImode, value)));
21107 if (align_bytes & 4)
21109 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21110 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21111 set_mem_align (dst, 4 * BITS_PER_UNIT);
21113 emit_insn (gen_strset (destreg, dst,
21114 gen_lowpart (SImode, value)));
21116 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21117 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21118 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21119 if (MEM_SIZE_KNOWN_P (orig_dst))
21120 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21124 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21125 static enum stringop_alg
21126 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
21127 int *dynamic_check)
21129 const struct stringop_algs * algs;
21130 bool optimize_for_speed;
21131 /* Algorithms using the rep prefix want at least edi and ecx;
21132 additionally, memset wants eax and memcpy wants esi. Don't
21133 consider such algorithms if the user has appropriated those
21134 registers for their own purposes. */
21135 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
21137 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
21139 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21140 || (alg != rep_prefix_1_byte \
21141 && alg != rep_prefix_4_byte \
21142 && alg != rep_prefix_8_byte))
21143 const struct processor_costs *cost;
21145 /* Even if the string operation call is cold, we still might spend a lot
21146 of time processing large blocks. */
21147 if (optimize_function_for_size_p (cfun)
21148 || (optimize_insn_for_size_p ()
21149 && expected_size != -1 && expected_size < 256))
21150 optimize_for_speed = false;
21152 optimize_for_speed = true;
21154 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
21156 *dynamic_check = -1;
21158 algs = &cost->memset[TARGET_64BIT != 0];
21160 algs = &cost->memcpy[TARGET_64BIT != 0];
21161 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
21162 return ix86_stringop_alg;
21163 /* rep; movq or rep; movl is the smallest variant. */
21164 else if (!optimize_for_speed)
21166 if (!count || (count & 3))
21167 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
21169 return rep_prefix_usable ? rep_prefix_4_byte : loop;
21171 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21173 else if (expected_size != -1 && expected_size < 4)
21174 return loop_1_byte;
21175 else if (expected_size != -1)
21178 enum stringop_alg alg = libcall;
21179 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
21181 /* We get here if the algorithms that were not libcall-based
21182 were rep-prefix based and we are unable to use rep prefixes
21183 based on global register usage. Break out of the loop and
21184 use the heuristic below. */
21185 if (algs->size[i].max == 0)
21187 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
21189 enum stringop_alg candidate = algs->size[i].alg;
21191 if (candidate != libcall && ALG_USABLE_P (candidate))
21193 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
21194 last non-libcall inline algorithm. */
21195 if (TARGET_INLINE_ALL_STRINGOPS)
21197 /* When the current size is best to be copied by a libcall,
21198 but we are still forced to inline, run the heuristic below
21199 that will pick code for medium sized blocks. */
21200 if (alg != libcall)
21204 else if (ALG_USABLE_P (candidate))
21208 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
21210 /* When asked to inline the call anyway, try to pick meaningful choice.
21211 We look for maximal size of block that is faster to copy by hand and
21212 take blocks of at most of that size guessing that average size will
21213 be roughly half of the block.
21215 If this turns out to be bad, we might simply specify the preferred
21216 choice in ix86_costs. */
21217 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21218 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
21221 enum stringop_alg alg;
21223 bool any_alg_usable_p = true;
21225 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
21227 enum stringop_alg candidate = algs->size[i].alg;
21228 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
21230 if (candidate != libcall && candidate
21231 && ALG_USABLE_P (candidate))
21232 max = algs->size[i].max;
21234 /* If there aren't any usable algorithms, then recursing on
21235 smaller sizes isn't going to find anything. Just return the
21236 simple byte-at-a-time copy loop. */
21237 if (!any_alg_usable_p)
21239 /* Pick something reasonable. */
21240 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21241 *dynamic_check = 128;
21242 return loop_1_byte;
21246 alg = decide_alg (count, max / 2, memset, dynamic_check);
21247 gcc_assert (*dynamic_check == -1);
21248 gcc_assert (alg != libcall);
21249 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21250 *dynamic_check = max;
21253 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
21254 #undef ALG_USABLE_P
21257 /* Decide on alignment. We know that the operand is already aligned to ALIGN
21258 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
21260 decide_alignment (int align,
21261 enum stringop_alg alg,
21264 int desired_align = 0;
21268 gcc_unreachable ();
21270 case unrolled_loop:
21271 desired_align = GET_MODE_SIZE (Pmode);
21273 case rep_prefix_8_byte:
21276 case rep_prefix_4_byte:
21277 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21278 copying whole cacheline at once. */
21279 if (TARGET_PENTIUMPRO)
21284 case rep_prefix_1_byte:
21285 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21286 copying whole cacheline at once. */
21287 if (TARGET_PENTIUMPRO)
21301 if (desired_align < align)
21302 desired_align = align;
21303 if (expected_size != -1 && expected_size < 4)
21304 desired_align = align;
21305 return desired_align;
21308 /* Return the smallest power of 2 greater than VAL. */
21310 smallest_pow2_greater_than (int val)
21318 /* Expand string move (memcpy) operation. Use i386 string operations
21319 when profitable. expand_setmem contains similar code. The code
21320 depends upon architecture, block size and alignment, but always has
21321 the same overall structure:
21323 1) Prologue guard: Conditional that jumps up to epilogues for small
21324 blocks that can be handled by epilogue alone. This is faster
21325 but also needed for correctness, since prologue assume the block
21326 is larger than the desired alignment.
21328 Optional dynamic check for size and libcall for large
21329 blocks is emitted here too, with -minline-stringops-dynamically.
21331 2) Prologue: copy first few bytes in order to get destination
21332 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
21333 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
21334 copied. We emit either a jump tree on power of two sized
21335 blocks, or a byte loop.
21337 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
21338 with specified algorithm.
21340 4) Epilogue: code copying tail of the block that is too small to be
21341 handled by main body (or up to size guarded by prologue guard). */
21344 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
21345 rtx expected_align_exp, rtx expected_size_exp)
21351 rtx jump_around_label = NULL;
21352 HOST_WIDE_INT align = 1;
21353 unsigned HOST_WIDE_INT count = 0;
21354 HOST_WIDE_INT expected_size = -1;
21355 int size_needed = 0, epilogue_size_needed;
21356 int desired_align = 0, align_bytes = 0;
21357 enum stringop_alg alg;
21359 bool need_zero_guard = false;
21361 if (CONST_INT_P (align_exp))
21362 align = INTVAL (align_exp);
21363 /* i386 can do misaligned access on reasonably increased cost. */
21364 if (CONST_INT_P (expected_align_exp)
21365 && INTVAL (expected_align_exp) > align)
21366 align = INTVAL (expected_align_exp);
21367 /* ALIGN is the minimum of destination and source alignment, but we care here
21368 just about destination alignment. */
21369 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
21370 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
21372 if (CONST_INT_P (count_exp))
21373 count = expected_size = INTVAL (count_exp);
21374 if (CONST_INT_P (expected_size_exp) && count == 0)
21375 expected_size = INTVAL (expected_size_exp);
21377 /* Make sure we don't need to care about overflow later on. */
21378 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21381 /* Step 0: Decide on preferred algorithm, desired alignment and
21382 size of chunks to be copied by main loop. */
21384 alg = decide_alg (count, expected_size, false, &dynamic_check);
21385 desired_align = decide_alignment (align, alg, expected_size);
21387 if (!TARGET_ALIGN_STRINGOPS)
21388 align = desired_align;
21390 if (alg == libcall)
21392 gcc_assert (alg != no_stringop);
21394 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
21395 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21396 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
21401 gcc_unreachable ();
21403 need_zero_guard = true;
21404 size_needed = GET_MODE_SIZE (Pmode);
21406 case unrolled_loop:
21407 need_zero_guard = true;
21408 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
21410 case rep_prefix_8_byte:
21413 case rep_prefix_4_byte:
21416 case rep_prefix_1_byte:
21420 need_zero_guard = true;
21425 epilogue_size_needed = size_needed;
21427 /* Step 1: Prologue guard. */
21429 /* Alignment code needs count to be in register. */
21430 if (CONST_INT_P (count_exp) && desired_align > align)
21432 if (INTVAL (count_exp) > desired_align
21433 && INTVAL (count_exp) > size_needed)
21436 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21437 if (align_bytes <= 0)
21440 align_bytes = desired_align - align_bytes;
21442 if (align_bytes == 0)
21443 count_exp = force_reg (counter_mode (count_exp), count_exp);
21445 gcc_assert (desired_align >= 1 && align >= 1);
21447 /* Ensure that alignment prologue won't copy past end of block. */
21448 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21450 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21451 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
21452 Make sure it is power of 2. */
21453 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21457 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21459 /* If main algorithm works on QImode, no epilogue is needed.
21460 For small sizes just don't align anything. */
21461 if (size_needed == 1)
21462 desired_align = align;
21469 label = gen_label_rtx ();
21470 emit_cmp_and_jump_insns (count_exp,
21471 GEN_INT (epilogue_size_needed),
21472 LTU, 0, counter_mode (count_exp), 1, label);
21473 if (expected_size == -1 || expected_size < epilogue_size_needed)
21474 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21476 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21480 /* Emit code to decide on runtime whether library call or inline should be
21482 if (dynamic_check != -1)
21484 if (CONST_INT_P (count_exp))
21486 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
21488 emit_block_move_via_libcall (dst, src, count_exp, false);
21489 count_exp = const0_rtx;
21495 rtx hot_label = gen_label_rtx ();
21496 jump_around_label = gen_label_rtx ();
21497 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21498 LEU, 0, GET_MODE (count_exp), 1, hot_label);
21499 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21500 emit_block_move_via_libcall (dst, src, count_exp, false);
21501 emit_jump (jump_around_label);
21502 emit_label (hot_label);
21506 /* Step 2: Alignment prologue. */
21508 if (desired_align > align)
21510 if (align_bytes == 0)
21512 /* Except for the first move in epilogue, we no longer know
21513 constant offset in aliasing info. It don't seems to worth
21514 the pain to maintain it for the first move, so throw away
21516 src = change_address (src, BLKmode, srcreg);
21517 dst = change_address (dst, BLKmode, destreg);
21518 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
21523 /* If we know how many bytes need to be stored before dst is
21524 sufficiently aligned, maintain aliasing info accurately. */
21525 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
21526 desired_align, align_bytes);
21527 count_exp = plus_constant (count_exp, -align_bytes);
21528 count -= align_bytes;
21530 if (need_zero_guard
21531 && (count < (unsigned HOST_WIDE_INT) size_needed
21532 || (align_bytes == 0
21533 && count < ((unsigned HOST_WIDE_INT) size_needed
21534 + desired_align - align))))
21536 /* It is possible that we copied enough so the main loop will not
21538 gcc_assert (size_needed > 1);
21539 if (label == NULL_RTX)
21540 label = gen_label_rtx ();
21541 emit_cmp_and_jump_insns (count_exp,
21542 GEN_INT (size_needed),
21543 LTU, 0, counter_mode (count_exp), 1, label);
21544 if (expected_size == -1
21545 || expected_size < (desired_align - align) / 2 + size_needed)
21546 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21548 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21551 if (label && size_needed == 1)
21553 emit_label (label);
21554 LABEL_NUSES (label) = 1;
21556 epilogue_size_needed = 1;
21558 else if (label == NULL_RTX)
21559 epilogue_size_needed = size_needed;
21561 /* Step 3: Main loop. */
21567 gcc_unreachable ();
21569 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21570 count_exp, QImode, 1, expected_size);
21573 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21574 count_exp, Pmode, 1, expected_size);
21576 case unrolled_loop:
21577 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
21578 registers for 4 temporaries anyway. */
21579 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21580 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
21583 case rep_prefix_8_byte:
21584 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21587 case rep_prefix_4_byte:
21588 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21591 case rep_prefix_1_byte:
21592 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21596 /* Adjust properly the offset of src and dest memory for aliasing. */
21597 if (CONST_INT_P (count_exp))
21599 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
21600 (count / size_needed) * size_needed);
21601 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21602 (count / size_needed) * size_needed);
21606 src = change_address (src, BLKmode, srcreg);
21607 dst = change_address (dst, BLKmode, destreg);
21610 /* Step 4: Epilogue to copy the remaining bytes. */
21614 /* When the main loop is done, COUNT_EXP might hold original count,
21615 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21616 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21617 bytes. Compensate if needed. */
21619 if (size_needed < epilogue_size_needed)
21622 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21623 GEN_INT (size_needed - 1), count_exp, 1,
21625 if (tmp != count_exp)
21626 emit_move_insn (count_exp, tmp);
21628 emit_label (label);
21629 LABEL_NUSES (label) = 1;
21632 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21633 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
21634 epilogue_size_needed);
21635 if (jump_around_label)
21636 emit_label (jump_around_label);
21640 /* Helper function for memcpy. For QImode value 0xXY produce
21641 0xXYXYXYXY of wide specified by MODE. This is essentially
21642 a * 0x10101010, but we can do slightly better than
21643 synth_mult by unwinding the sequence by hand on CPUs with
21646 promote_duplicated_reg (enum machine_mode mode, rtx val)
21648 enum machine_mode valmode = GET_MODE (val);
21650 int nops = mode == DImode ? 3 : 2;
21652 gcc_assert (mode == SImode || mode == DImode);
21653 if (val == const0_rtx)
21654 return copy_to_mode_reg (mode, const0_rtx);
21655 if (CONST_INT_P (val))
21657 HOST_WIDE_INT v = INTVAL (val) & 255;
21661 if (mode == DImode)
21662 v |= (v << 16) << 16;
21663 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
21666 if (valmode == VOIDmode)
21668 if (valmode != QImode)
21669 val = gen_lowpart (QImode, val);
21670 if (mode == QImode)
21672 if (!TARGET_PARTIAL_REG_STALL)
21674 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
21675 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
21676 <= (ix86_cost->shift_const + ix86_cost->add) * nops
21677 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
21679 rtx reg = convert_modes (mode, QImode, val, true);
21680 tmp = promote_duplicated_reg (mode, const1_rtx);
21681 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
21686 rtx reg = convert_modes (mode, QImode, val, true);
21688 if (!TARGET_PARTIAL_REG_STALL)
21689 if (mode == SImode)
21690 emit_insn (gen_movsi_insv_1 (reg, reg));
21692 emit_insn (gen_movdi_insv_1 (reg, reg));
21695 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
21696 NULL, 1, OPTAB_DIRECT);
21698 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21700 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
21701 NULL, 1, OPTAB_DIRECT);
21702 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21703 if (mode == SImode)
21705 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
21706 NULL, 1, OPTAB_DIRECT);
21707 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21712 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21713 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21714 alignment from ALIGN to DESIRED_ALIGN. */
21716 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
21721 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
21722 promoted_val = promote_duplicated_reg (DImode, val);
21723 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
21724 promoted_val = promote_duplicated_reg (SImode, val);
21725 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21726 promoted_val = promote_duplicated_reg (HImode, val);
21728 promoted_val = val;
21730 return promoted_val;
21733 /* Expand string clear operation (bzero). Use i386 string operations when
21734 profitable. See expand_movmem comment for explanation of individual
21735 steps performed. */
21737 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21738 rtx expected_align_exp, rtx expected_size_exp)
21743 rtx jump_around_label = NULL;
21744 HOST_WIDE_INT align = 1;
21745 unsigned HOST_WIDE_INT count = 0;
21746 HOST_WIDE_INT expected_size = -1;
21747 int size_needed = 0, epilogue_size_needed;
21748 int desired_align = 0, align_bytes = 0;
21749 enum stringop_alg alg;
21750 rtx promoted_val = NULL;
21751 bool force_loopy_epilogue = false;
21753 bool need_zero_guard = false;
21755 if (CONST_INT_P (align_exp))
21756 align = INTVAL (align_exp);
21757 /* i386 can do misaligned access on reasonably increased cost. */
21758 if (CONST_INT_P (expected_align_exp)
21759 && INTVAL (expected_align_exp) > align)
21760 align = INTVAL (expected_align_exp);
21761 if (CONST_INT_P (count_exp))
21762 count = expected_size = INTVAL (count_exp);
21763 if (CONST_INT_P (expected_size_exp) && count == 0)
21764 expected_size = INTVAL (expected_size_exp);
21766 /* Make sure we don't need to care about overflow later on. */
21767 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21770 /* Step 0: Decide on preferred algorithm, desired alignment and
21771 size of chunks to be copied by main loop. */
21773 alg = decide_alg (count, expected_size, true, &dynamic_check);
21774 desired_align = decide_alignment (align, alg, expected_size);
21776 if (!TARGET_ALIGN_STRINGOPS)
21777 align = desired_align;
21779 if (alg == libcall)
21781 gcc_assert (alg != no_stringop);
21783 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21784 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21789 gcc_unreachable ();
21791 need_zero_guard = true;
21792 size_needed = GET_MODE_SIZE (Pmode);
21794 case unrolled_loop:
21795 need_zero_guard = true;
21796 size_needed = GET_MODE_SIZE (Pmode) * 4;
21798 case rep_prefix_8_byte:
21801 case rep_prefix_4_byte:
21804 case rep_prefix_1_byte:
21808 need_zero_guard = true;
21812 epilogue_size_needed = size_needed;
21814 /* Step 1: Prologue guard. */
21816 /* Alignment code needs count to be in register. */
21817 if (CONST_INT_P (count_exp) && desired_align > align)
21819 if (INTVAL (count_exp) > desired_align
21820 && INTVAL (count_exp) > size_needed)
21823 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21824 if (align_bytes <= 0)
21827 align_bytes = desired_align - align_bytes;
21829 if (align_bytes == 0)
21831 enum machine_mode mode = SImode;
21832 if (TARGET_64BIT && (count & ~0xffffffff))
21834 count_exp = force_reg (mode, count_exp);
21837 /* Do the cheap promotion to allow better CSE across the
21838 main loop and epilogue (ie one load of the big constant in the
21839 front of all code. */
21840 if (CONST_INT_P (val_exp))
21841 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21842 desired_align, align);
21843 /* Ensure that alignment prologue won't copy past end of block. */
21844 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21846 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21847 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21848 Make sure it is power of 2. */
21849 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21851 /* To improve performance of small blocks, we jump around the VAL
21852 promoting mode. This mean that if the promoted VAL is not constant,
21853 we might not use it in the epilogue and have to use byte
21855 if (epilogue_size_needed > 2 && !promoted_val)
21856 force_loopy_epilogue = true;
21859 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21861 /* If main algorithm works on QImode, no epilogue is needed.
21862 For small sizes just don't align anything. */
21863 if (size_needed == 1)
21864 desired_align = align;
21871 label = gen_label_rtx ();
21872 emit_cmp_and_jump_insns (count_exp,
21873 GEN_INT (epilogue_size_needed),
21874 LTU, 0, counter_mode (count_exp), 1, label);
21875 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21876 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21878 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21881 if (dynamic_check != -1)
21883 rtx hot_label = gen_label_rtx ();
21884 jump_around_label = gen_label_rtx ();
21885 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21886 LEU, 0, counter_mode (count_exp), 1, hot_label);
21887 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21888 set_storage_via_libcall (dst, count_exp, val_exp, false);
21889 emit_jump (jump_around_label);
21890 emit_label (hot_label);
21893 /* Step 2: Alignment prologue. */
21895 /* Do the expensive promotion once we branched off the small blocks. */
21897 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21898 desired_align, align);
21899 gcc_assert (desired_align >= 1 && align >= 1);
21901 if (desired_align > align)
21903 if (align_bytes == 0)
21905 /* Except for the first move in epilogue, we no longer know
21906 constant offset in aliasing info. It don't seems to worth
21907 the pain to maintain it for the first move, so throw away
21909 dst = change_address (dst, BLKmode, destreg);
21910 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21915 /* If we know how many bytes need to be stored before dst is
21916 sufficiently aligned, maintain aliasing info accurately. */
21917 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21918 desired_align, align_bytes);
21919 count_exp = plus_constant (count_exp, -align_bytes);
21920 count -= align_bytes;
21922 if (need_zero_guard
21923 && (count < (unsigned HOST_WIDE_INT) size_needed
21924 || (align_bytes == 0
21925 && count < ((unsigned HOST_WIDE_INT) size_needed
21926 + desired_align - align))))
21928 /* It is possible that we copied enough so the main loop will not
21930 gcc_assert (size_needed > 1);
21931 if (label == NULL_RTX)
21932 label = gen_label_rtx ();
21933 emit_cmp_and_jump_insns (count_exp,
21934 GEN_INT (size_needed),
21935 LTU, 0, counter_mode (count_exp), 1, label);
21936 if (expected_size == -1
21937 || expected_size < (desired_align - align) / 2 + size_needed)
21938 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21940 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21943 if (label && size_needed == 1)
21945 emit_label (label);
21946 LABEL_NUSES (label) = 1;
21948 promoted_val = val_exp;
21949 epilogue_size_needed = 1;
21951 else if (label == NULL_RTX)
21952 epilogue_size_needed = size_needed;
21954 /* Step 3: Main loop. */
21960 gcc_unreachable ();
21962 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21963 count_exp, QImode, 1, expected_size);
21966 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21967 count_exp, Pmode, 1, expected_size);
21969 case unrolled_loop:
21970 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21971 count_exp, Pmode, 4, expected_size);
21973 case rep_prefix_8_byte:
21974 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21977 case rep_prefix_4_byte:
21978 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21981 case rep_prefix_1_byte:
21982 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21986 /* Adjust properly the offset of src and dest memory for aliasing. */
21987 if (CONST_INT_P (count_exp))
21988 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21989 (count / size_needed) * size_needed);
21991 dst = change_address (dst, BLKmode, destreg);
21993 /* Step 4: Epilogue to copy the remaining bytes. */
21997 /* When the main loop is done, COUNT_EXP might hold original count,
21998 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21999 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22000 bytes. Compensate if needed. */
22002 if (size_needed < epilogue_size_needed)
22005 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22006 GEN_INT (size_needed - 1), count_exp, 1,
22008 if (tmp != count_exp)
22009 emit_move_insn (count_exp, tmp);
22011 emit_label (label);
22012 LABEL_NUSES (label) = 1;
22015 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22017 if (force_loopy_epilogue)
22018 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
22019 epilogue_size_needed);
22021 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
22022 epilogue_size_needed);
22024 if (jump_around_label)
22025 emit_label (jump_around_label);
22029 /* Expand the appropriate insns for doing strlen if not just doing
22032 out = result, initialized with the start address
22033 align_rtx = alignment of the address.
22034 scratch = scratch register, initialized with the startaddress when
22035 not aligned, otherwise undefined
22037 This is just the body. It needs the initializations mentioned above and
22038 some address computing at the end. These things are done in i386.md. */
22041 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
22045 rtx align_2_label = NULL_RTX;
22046 rtx align_3_label = NULL_RTX;
22047 rtx align_4_label = gen_label_rtx ();
22048 rtx end_0_label = gen_label_rtx ();
22050 rtx tmpreg = gen_reg_rtx (SImode);
22051 rtx scratch = gen_reg_rtx (SImode);
22055 if (CONST_INT_P (align_rtx))
22056 align = INTVAL (align_rtx);
22058 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22060 /* Is there a known alignment and is it less than 4? */
22063 rtx scratch1 = gen_reg_rtx (Pmode);
22064 emit_move_insn (scratch1, out);
22065 /* Is there a known alignment and is it not 2? */
22068 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
22069 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
22071 /* Leave just the 3 lower bits. */
22072 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
22073 NULL_RTX, 0, OPTAB_WIDEN);
22075 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22076 Pmode, 1, align_4_label);
22077 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
22078 Pmode, 1, align_2_label);
22079 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
22080 Pmode, 1, align_3_label);
22084 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22085 check if is aligned to 4 - byte. */
22087 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
22088 NULL_RTX, 0, OPTAB_WIDEN);
22090 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22091 Pmode, 1, align_4_label);
22094 mem = change_address (src, QImode, out);
22096 /* Now compare the bytes. */
22098 /* Compare the first n unaligned byte on a byte per byte basis. */
22099 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
22100 QImode, 1, end_0_label);
22102 /* Increment the address. */
22103 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22105 /* Not needed with an alignment of 2 */
22108 emit_label (align_2_label);
22110 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
22113 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22115 emit_label (align_3_label);
22118 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
22121 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22124 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22125 align this loop. It gives only huge programs, but does not help to
22127 emit_label (align_4_label);
22129 mem = change_address (src, SImode, out);
22130 emit_move_insn (scratch, mem);
22131 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
22133 /* This formula yields a nonzero result iff one of the bytes is zero.
22134 This saves three branches inside loop and many cycles. */
22136 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
22137 emit_insn (gen_one_cmplsi2 (scratch, scratch));
22138 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
22139 emit_insn (gen_andsi3 (tmpreg, tmpreg,
22140 gen_int_mode (0x80808080, SImode)));
22141 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
22146 rtx reg = gen_reg_rtx (SImode);
22147 rtx reg2 = gen_reg_rtx (Pmode);
22148 emit_move_insn (reg, tmpreg);
22149 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
22151 /* If zero is not in the first two bytes, move two bytes forward. */
22152 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
22153 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22154 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
22155 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
22156 gen_rtx_IF_THEN_ELSE (SImode, tmp,
22159 /* Emit lea manually to avoid clobbering of flags. */
22160 emit_insn (gen_rtx_SET (SImode, reg2,
22161 gen_rtx_PLUS (Pmode, out, const2_rtx)));
22163 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22164 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
22165 emit_insn (gen_rtx_SET (VOIDmode, out,
22166 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
22172 rtx end_2_label = gen_label_rtx ();
22173 /* Is zero in the first two bytes? */
22175 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
22176 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22177 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
22178 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22179 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
22181 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22182 JUMP_LABEL (tmp) = end_2_label;
22184 /* Not in the first two. Move two bytes forward. */
22185 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
22186 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
22188 emit_label (end_2_label);
22192 /* Avoid branch in fixing the byte. */
22193 tmpreg = gen_lowpart (QImode, tmpreg);
22194 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
22195 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
22196 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
22197 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
22199 emit_label (end_0_label);
22202 /* Expand strlen. */
22205 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
22207 rtx addr, scratch1, scratch2, scratch3, scratch4;
22209 /* The generic case of strlen expander is long. Avoid it's
22210 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
22212 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
22213 && !TARGET_INLINE_ALL_STRINGOPS
22214 && !optimize_insn_for_size_p ()
22215 && (!CONST_INT_P (align) || INTVAL (align) < 4))
22218 addr = force_reg (Pmode, XEXP (src, 0));
22219 scratch1 = gen_reg_rtx (Pmode);
22221 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
22222 && !optimize_insn_for_size_p ())
22224 /* Well it seems that some optimizer does not combine a call like
22225 foo(strlen(bar), strlen(bar));
22226 when the move and the subtraction is done here. It does calculate
22227 the length just once when these instructions are done inside of
22228 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
22229 often used and I use one fewer register for the lifetime of
22230 output_strlen_unroll() this is better. */
22232 emit_move_insn (out, addr);
22234 ix86_expand_strlensi_unroll_1 (out, src, align);
22236 /* strlensi_unroll_1 returns the address of the zero at the end of
22237 the string, like memchr(), so compute the length by subtracting
22238 the start address. */
22239 emit_insn (ix86_gen_sub3 (out, out, addr));
22245 /* Can't use this if the user has appropriated eax, ecx, or edi. */
22246 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
22249 scratch2 = gen_reg_rtx (Pmode);
22250 scratch3 = gen_reg_rtx (Pmode);
22251 scratch4 = force_reg (Pmode, constm1_rtx);
22253 emit_move_insn (scratch3, addr);
22254 eoschar = force_reg (QImode, eoschar);
22256 src = replace_equiv_address_nv (src, scratch3);
22258 /* If .md starts supporting :P, this can be done in .md. */
22259 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
22260 scratch4), UNSPEC_SCAS);
22261 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
22262 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
22263 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
22268 /* For given symbol (function) construct code to compute address of it's PLT
22269 entry in large x86-64 PIC model. */
22271 construct_plt_address (rtx symbol)
22273 rtx tmp = gen_reg_rtx (Pmode);
22274 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
22276 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
22277 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
22279 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
22280 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
22285 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
22287 rtx pop, bool sibcall)
22289 /* We need to represent that SI and DI registers are clobbered
22291 static int clobbered_registers[] = {
22292 XMM6_REG, XMM7_REG, XMM8_REG,
22293 XMM9_REG, XMM10_REG, XMM11_REG,
22294 XMM12_REG, XMM13_REG, XMM14_REG,
22295 XMM15_REG, SI_REG, DI_REG
22297 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
22298 rtx use = NULL, call;
22299 unsigned int vec_len;
22301 if (pop == const0_rtx)
22303 gcc_assert (!TARGET_64BIT || !pop);
22305 if (TARGET_MACHO && !TARGET_64BIT)
22308 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
22309 fnaddr = machopic_indirect_call_target (fnaddr);
22314 /* Static functions and indirect calls don't need the pic register. */
22315 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
22316 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
22317 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
22318 use_reg (&use, pic_offset_table_rtx);
22321 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
22323 rtx al = gen_rtx_REG (QImode, AX_REG);
22324 emit_move_insn (al, callarg2);
22325 use_reg (&use, al);
22328 if (ix86_cmodel == CM_LARGE_PIC
22330 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
22331 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
22332 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
22334 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
22335 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
22337 fnaddr = XEXP (fnaddr, 0);
22338 if (GET_MODE (fnaddr) != Pmode)
22339 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
22340 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
22344 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
22346 call = gen_rtx_SET (VOIDmode, retval, call);
22347 vec[vec_len++] = call;
22351 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
22352 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
22353 vec[vec_len++] = pop;
22356 if (TARGET_64BIT_MS_ABI
22357 && (!callarg2 || INTVAL (callarg2) != -2))
22361 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
22362 UNSPEC_MS_TO_SYSV_CALL);
22364 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
22366 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
22368 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
22370 clobbered_registers[i]));
22373 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
22374 if (TARGET_VZEROUPPER)
22377 if (cfun->machine->callee_pass_avx256_p)
22379 if (cfun->machine->callee_return_avx256_p)
22380 avx256 = callee_return_pass_avx256;
22382 avx256 = callee_pass_avx256;
22384 else if (cfun->machine->callee_return_avx256_p)
22385 avx256 = callee_return_avx256;
22387 avx256 = call_no_avx256;
22389 if (reload_completed)
22390 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
22392 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
22393 gen_rtvec (1, GEN_INT (avx256)),
22394 UNSPEC_CALL_NEEDS_VZEROUPPER);
22398 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
22399 call = emit_call_insn (call);
22401 CALL_INSN_FUNCTION_USAGE (call) = use;
22407 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
22409 rtx pat = PATTERN (insn);
22410 rtvec vec = XVEC (pat, 0);
22411 int len = GET_NUM_ELEM (vec) - 1;
22413 /* Strip off the last entry of the parallel. */
22414 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
22415 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
22417 pat = RTVEC_ELT (vec, 0);
22419 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
22421 emit_insn (gen_avx_vzeroupper (vzeroupper));
22422 emit_call_insn (pat);
22425 /* Output the assembly for a call instruction. */
22428 ix86_output_call_insn (rtx insn, rtx call_op)
22430 bool direct_p = constant_call_address_operand (call_op, Pmode);
22431 bool seh_nop_p = false;
22434 if (SIBLING_CALL_P (insn))
22438 /* SEH epilogue detection requires the indirect branch case
22439 to include REX.W. */
22440 else if (TARGET_SEH)
22441 xasm = "rex.W jmp %A0";
22445 output_asm_insn (xasm, &call_op);
22449 /* SEH unwinding can require an extra nop to be emitted in several
22450 circumstances. Determine if we have one of those. */
22455 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
22457 /* If we get to another real insn, we don't need the nop. */
22461 /* If we get to the epilogue note, prevent a catch region from
22462 being adjacent to the standard epilogue sequence. If non-
22463 call-exceptions, we'll have done this during epilogue emission. */
22464 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
22465 && !flag_non_call_exceptions
22466 && !can_throw_internal (insn))
22473 /* If we didn't find a real insn following the call, prevent the
22474 unwinder from looking into the next function. */
22480 xasm = "call\t%P0";
22482 xasm = "call\t%A0";
22484 output_asm_insn (xasm, &call_op);
22492 /* Clear stack slot assignments remembered from previous functions.
22493 This is called from INIT_EXPANDERS once before RTL is emitted for each
22496 static struct machine_function *
22497 ix86_init_machine_status (void)
22499 struct machine_function *f;
22501 f = ggc_alloc_cleared_machine_function ();
22502 f->use_fast_prologue_epilogue_nregs = -1;
22503 f->tls_descriptor_call_expanded_p = 0;
22504 f->call_abi = ix86_abi;
22509 /* Return a MEM corresponding to a stack slot with mode MODE.
22510 Allocate a new slot if necessary.
22512 The RTL for a function can have several slots available: N is
22513 which slot to use. */
22516 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
22518 struct stack_local_entry *s;
22520 gcc_assert (n < MAX_386_STACK_LOCALS);
22522 /* Virtual slot is valid only before vregs are instantiated. */
22523 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
22525 for (s = ix86_stack_locals; s; s = s->next)
22526 if (s->mode == mode && s->n == n)
22527 return validize_mem (copy_rtx (s->rtl));
22529 s = ggc_alloc_stack_local_entry ();
22532 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
22534 s->next = ix86_stack_locals;
22535 ix86_stack_locals = s;
22536 return validize_mem (s->rtl);
22539 /* Calculate the length of the memory address in the instruction encoding.
22540 Includes addr32 prefix, does not include the one-byte modrm, opcode,
22541 or other prefixes. */
22544 memory_address_length (rtx addr)
22546 struct ix86_address parts;
22547 rtx base, index, disp;
22551 if (GET_CODE (addr) == PRE_DEC
22552 || GET_CODE (addr) == POST_INC
22553 || GET_CODE (addr) == PRE_MODIFY
22554 || GET_CODE (addr) == POST_MODIFY)
22557 ok = ix86_decompose_address (addr, &parts);
22560 if (parts.base && GET_CODE (parts.base) == SUBREG)
22561 parts.base = SUBREG_REG (parts.base);
22562 if (parts.index && GET_CODE (parts.index) == SUBREG)
22563 parts.index = SUBREG_REG (parts.index);
22566 index = parts.index;
22569 /* Add length of addr32 prefix. */
22570 len = (GET_CODE (addr) == ZERO_EXTEND
22571 || GET_CODE (addr) == AND);
22574 - esp as the base always wants an index,
22575 - ebp as the base always wants a displacement,
22576 - r12 as the base always wants an index,
22577 - r13 as the base always wants a displacement. */
22579 /* Register Indirect. */
22580 if (base && !index && !disp)
22582 /* esp (for its index) and ebp (for its displacement) need
22583 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
22586 && (addr == arg_pointer_rtx
22587 || addr == frame_pointer_rtx
22588 || REGNO (addr) == SP_REG
22589 || REGNO (addr) == BP_REG
22590 || REGNO (addr) == R12_REG
22591 || REGNO (addr) == R13_REG))
22595 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
22596 is not disp32, but disp32(%rip), so for disp32
22597 SIB byte is needed, unless print_operand_address
22598 optimizes it into disp32(%rip) or (%rip) is implied
22600 else if (disp && !base && !index)
22607 if (GET_CODE (disp) == CONST)
22608 symbol = XEXP (disp, 0);
22609 if (GET_CODE (symbol) == PLUS
22610 && CONST_INT_P (XEXP (symbol, 1)))
22611 symbol = XEXP (symbol, 0);
22613 if (GET_CODE (symbol) != LABEL_REF
22614 && (GET_CODE (symbol) != SYMBOL_REF
22615 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
22616 && (GET_CODE (symbol) != UNSPEC
22617 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
22618 && XINT (symbol, 1) != UNSPEC_PCREL
22619 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
22626 /* Find the length of the displacement constant. */
22629 if (base && satisfies_constraint_K (disp))
22634 /* ebp always wants a displacement. Similarly r13. */
22635 else if (base && REG_P (base)
22636 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
22639 /* An index requires the two-byte modrm form.... */
22641 /* ...like esp (or r12), which always wants an index. */
22642 || base == arg_pointer_rtx
22643 || base == frame_pointer_rtx
22644 || (base && REG_P (base)
22645 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
22662 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22663 is set, expect that insn have 8bit immediate alternative. */
22665 ix86_attr_length_immediate_default (rtx insn, bool shortform)
22669 extract_insn_cached (insn);
22670 for (i = recog_data.n_operands - 1; i >= 0; --i)
22671 if (CONSTANT_P (recog_data.operand[i]))
22673 enum attr_mode mode = get_attr_mode (insn);
22676 if (shortform && CONST_INT_P (recog_data.operand[i]))
22678 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
22685 ival = trunc_int_for_mode (ival, HImode);
22688 ival = trunc_int_for_mode (ival, SImode);
22693 if (IN_RANGE (ival, -128, 127))
22710 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22715 fatal_insn ("unknown insn mode", insn);
22720 /* Compute default value for "length_address" attribute. */
22722 ix86_attr_length_address_default (rtx insn)
22726 if (get_attr_type (insn) == TYPE_LEA)
22728 rtx set = PATTERN (insn), addr;
22730 if (GET_CODE (set) == PARALLEL)
22731 set = XVECEXP (set, 0, 0);
22733 gcc_assert (GET_CODE (set) == SET);
22735 addr = SET_SRC (set);
22736 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22738 if (GET_CODE (addr) == ZERO_EXTEND)
22739 addr = XEXP (addr, 0);
22740 if (GET_CODE (addr) == SUBREG)
22741 addr = SUBREG_REG (addr);
22744 return memory_address_length (addr);
22747 extract_insn_cached (insn);
22748 for (i = recog_data.n_operands - 1; i >= 0; --i)
22749 if (MEM_P (recog_data.operand[i]))
22751 constrain_operands_cached (reload_completed);
22752 if (which_alternative != -1)
22754 const char *constraints = recog_data.constraints[i];
22755 int alt = which_alternative;
22757 while (*constraints == '=' || *constraints == '+')
22760 while (*constraints++ != ',')
22762 /* Skip ignored operands. */
22763 if (*constraints == 'X')
22766 return memory_address_length (XEXP (recog_data.operand[i], 0));
22771 /* Compute default value for "length_vex" attribute. It includes
22772 2 or 3 byte VEX prefix and 1 opcode byte. */
22775 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
22779 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22780 byte VEX prefix. */
22781 if (!has_0f_opcode || has_vex_w)
22784 /* We can always use 2 byte VEX prefix in 32bit. */
22788 extract_insn_cached (insn);
22790 for (i = recog_data.n_operands - 1; i >= 0; --i)
22791 if (REG_P (recog_data.operand[i]))
22793 /* REX.W bit uses 3 byte VEX prefix. */
22794 if (GET_MODE (recog_data.operand[i]) == DImode
22795 && GENERAL_REG_P (recog_data.operand[i]))
22800 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22801 if (MEM_P (recog_data.operand[i])
22802 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22809 /* Return the maximum number of instructions a cpu can issue. */
22812 ix86_issue_rate (void)
22816 case PROCESSOR_PENTIUM:
22817 case PROCESSOR_ATOM:
22821 case PROCESSOR_PENTIUMPRO:
22822 case PROCESSOR_PENTIUM4:
22823 case PROCESSOR_CORE2_32:
22824 case PROCESSOR_CORE2_64:
22825 case PROCESSOR_COREI7_32:
22826 case PROCESSOR_COREI7_64:
22827 case PROCESSOR_ATHLON:
22829 case PROCESSOR_AMDFAM10:
22830 case PROCESSOR_NOCONA:
22831 case PROCESSOR_GENERIC32:
22832 case PROCESSOR_GENERIC64:
22833 case PROCESSOR_BDVER1:
22834 case PROCESSOR_BDVER2:
22835 case PROCESSOR_BTVER1:
22843 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
22844 by DEP_INSN and nothing set by DEP_INSN. */
22847 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22851 /* Simplify the test for uninteresting insns. */
22852 if (insn_type != TYPE_SETCC
22853 && insn_type != TYPE_ICMOV
22854 && insn_type != TYPE_FCMOV
22855 && insn_type != TYPE_IBR)
22858 if ((set = single_set (dep_insn)) != 0)
22860 set = SET_DEST (set);
22863 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22864 && XVECLEN (PATTERN (dep_insn), 0) == 2
22865 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22866 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22868 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22869 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22874 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22877 /* This test is true if the dependent insn reads the flags but
22878 not any other potentially set register. */
22879 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22882 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22888 /* Return true iff USE_INSN has a memory address with operands set by
22892 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22895 extract_insn_cached (use_insn);
22896 for (i = recog_data.n_operands - 1; i >= 0; --i)
22897 if (MEM_P (recog_data.operand[i]))
22899 rtx addr = XEXP (recog_data.operand[i], 0);
22900 return modified_in_p (addr, set_insn) != 0;
22906 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22908 enum attr_type insn_type, dep_insn_type;
22909 enum attr_memory memory;
22911 int dep_insn_code_number;
22913 /* Anti and output dependencies have zero cost on all CPUs. */
22914 if (REG_NOTE_KIND (link) != 0)
22917 dep_insn_code_number = recog_memoized (dep_insn);
22919 /* If we can't recognize the insns, we can't really do anything. */
22920 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22923 insn_type = get_attr_type (insn);
22924 dep_insn_type = get_attr_type (dep_insn);
22928 case PROCESSOR_PENTIUM:
22929 /* Address Generation Interlock adds a cycle of latency. */
22930 if (insn_type == TYPE_LEA)
22932 rtx addr = PATTERN (insn);
22934 if (GET_CODE (addr) == PARALLEL)
22935 addr = XVECEXP (addr, 0, 0);
22937 gcc_assert (GET_CODE (addr) == SET);
22939 addr = SET_SRC (addr);
22940 if (modified_in_p (addr, dep_insn))
22943 else if (ix86_agi_dependent (dep_insn, insn))
22946 /* ??? Compares pair with jump/setcc. */
22947 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22950 /* Floating point stores require value to be ready one cycle earlier. */
22951 if (insn_type == TYPE_FMOV
22952 && get_attr_memory (insn) == MEMORY_STORE
22953 && !ix86_agi_dependent (dep_insn, insn))
22957 case PROCESSOR_PENTIUMPRO:
22958 memory = get_attr_memory (insn);
22960 /* INT->FP conversion is expensive. */
22961 if (get_attr_fp_int_src (dep_insn))
22964 /* There is one cycle extra latency between an FP op and a store. */
22965 if (insn_type == TYPE_FMOV
22966 && (set = single_set (dep_insn)) != NULL_RTX
22967 && (set2 = single_set (insn)) != NULL_RTX
22968 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22969 && MEM_P (SET_DEST (set2)))
22972 /* Show ability of reorder buffer to hide latency of load by executing
22973 in parallel with previous instruction in case
22974 previous instruction is not needed to compute the address. */
22975 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22976 && !ix86_agi_dependent (dep_insn, insn))
22978 /* Claim moves to take one cycle, as core can issue one load
22979 at time and the next load can start cycle later. */
22980 if (dep_insn_type == TYPE_IMOV
22981 || dep_insn_type == TYPE_FMOV)
22989 memory = get_attr_memory (insn);
22991 /* The esp dependency is resolved before the instruction is really
22993 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22994 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22997 /* INT->FP conversion is expensive. */
22998 if (get_attr_fp_int_src (dep_insn))
23001 /* Show ability of reorder buffer to hide latency of load by executing
23002 in parallel with previous instruction in case
23003 previous instruction is not needed to compute the address. */
23004 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23005 && !ix86_agi_dependent (dep_insn, insn))
23007 /* Claim moves to take one cycle, as core can issue one load
23008 at time and the next load can start cycle later. */
23009 if (dep_insn_type == TYPE_IMOV
23010 || dep_insn_type == TYPE_FMOV)
23019 case PROCESSOR_ATHLON:
23021 case PROCESSOR_AMDFAM10:
23022 case PROCESSOR_BDVER1:
23023 case PROCESSOR_BDVER2:
23024 case PROCESSOR_BTVER1:
23025 case PROCESSOR_ATOM:
23026 case PROCESSOR_GENERIC32:
23027 case PROCESSOR_GENERIC64:
23028 memory = get_attr_memory (insn);
23030 /* Show ability of reorder buffer to hide latency of load by executing
23031 in parallel with previous instruction in case
23032 previous instruction is not needed to compute the address. */
23033 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23034 && !ix86_agi_dependent (dep_insn, insn))
23036 enum attr_unit unit = get_attr_unit (insn);
23039 /* Because of the difference between the length of integer and
23040 floating unit pipeline preparation stages, the memory operands
23041 for floating point are cheaper.
23043 ??? For Athlon it the difference is most probably 2. */
23044 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
23047 loadcost = TARGET_ATHLON ? 2 : 0;
23049 if (cost >= loadcost)
23062 /* How many alternative schedules to try. This should be as wide as the
23063 scheduling freedom in the DFA, but no wider. Making this value too
23064 large results extra work for the scheduler. */
23067 ia32_multipass_dfa_lookahead (void)
23071 case PROCESSOR_PENTIUM:
23074 case PROCESSOR_PENTIUMPRO:
23078 case PROCESSOR_CORE2_32:
23079 case PROCESSOR_CORE2_64:
23080 case PROCESSOR_COREI7_32:
23081 case PROCESSOR_COREI7_64:
23082 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23083 as many instructions can be executed on a cycle, i.e.,
23084 issue_rate. I wonder why tuning for many CPUs does not do this. */
23085 return ix86_issue_rate ();
23094 /* Model decoder of Core 2/i7.
23095 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
23096 track the instruction fetch block boundaries and make sure that long
23097 (9+ bytes) instructions are assigned to D0. */
23099 /* Maximum length of an insn that can be handled by
23100 a secondary decoder unit. '8' for Core 2/i7. */
23101 static int core2i7_secondary_decoder_max_insn_size;
23103 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
23104 '16' for Core 2/i7. */
23105 static int core2i7_ifetch_block_size;
23107 /* Maximum number of instructions decoder can handle per cycle.
23108 '6' for Core 2/i7. */
23109 static int core2i7_ifetch_block_max_insns;
23111 typedef struct ix86_first_cycle_multipass_data_ *
23112 ix86_first_cycle_multipass_data_t;
23113 typedef const struct ix86_first_cycle_multipass_data_ *
23114 const_ix86_first_cycle_multipass_data_t;
23116 /* A variable to store target state across calls to max_issue within
23118 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
23119 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
23121 /* Initialize DATA. */
23123 core2i7_first_cycle_multipass_init (void *_data)
23125 ix86_first_cycle_multipass_data_t data
23126 = (ix86_first_cycle_multipass_data_t) _data;
23128 data->ifetch_block_len = 0;
23129 data->ifetch_block_n_insns = 0;
23130 data->ready_try_change = NULL;
23131 data->ready_try_change_size = 0;
23134 /* Advancing the cycle; reset ifetch block counts. */
23136 core2i7_dfa_post_advance_cycle (void)
23138 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
23140 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
23142 data->ifetch_block_len = 0;
23143 data->ifetch_block_n_insns = 0;
23146 static int min_insn_size (rtx);
23148 /* Filter out insns from ready_try that the core will not be able to issue
23149 on current cycle due to decoder. */
23151 core2i7_first_cycle_multipass_filter_ready_try
23152 (const_ix86_first_cycle_multipass_data_t data,
23153 char *ready_try, int n_ready, bool first_cycle_insn_p)
23160 if (ready_try[n_ready])
23163 insn = get_ready_element (n_ready);
23164 insn_size = min_insn_size (insn);
23166 if (/* If this is a too long an insn for a secondary decoder ... */
23167 (!first_cycle_insn_p
23168 && insn_size > core2i7_secondary_decoder_max_insn_size)
23169 /* ... or it would not fit into the ifetch block ... */
23170 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
23171 /* ... or the decoder is full already ... */
23172 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
23173 /* ... mask the insn out. */
23175 ready_try[n_ready] = 1;
23177 if (data->ready_try_change)
23178 SET_BIT (data->ready_try_change, n_ready);
23183 /* Prepare for a new round of multipass lookahead scheduling. */
23185 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
23186 bool first_cycle_insn_p)
23188 ix86_first_cycle_multipass_data_t data
23189 = (ix86_first_cycle_multipass_data_t) _data;
23190 const_ix86_first_cycle_multipass_data_t prev_data
23191 = ix86_first_cycle_multipass_data;
23193 /* Restore the state from the end of the previous round. */
23194 data->ifetch_block_len = prev_data->ifetch_block_len;
23195 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
23197 /* Filter instructions that cannot be issued on current cycle due to
23198 decoder restrictions. */
23199 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
23200 first_cycle_insn_p);
23203 /* INSN is being issued in current solution. Account for its impact on
23204 the decoder model. */
23206 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
23207 rtx insn, const void *_prev_data)
23209 ix86_first_cycle_multipass_data_t data
23210 = (ix86_first_cycle_multipass_data_t) _data;
23211 const_ix86_first_cycle_multipass_data_t prev_data
23212 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
23214 int insn_size = min_insn_size (insn);
23216 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
23217 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
23218 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
23219 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
23221 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
23222 if (!data->ready_try_change)
23224 data->ready_try_change = sbitmap_alloc (n_ready);
23225 data->ready_try_change_size = n_ready;
23227 else if (data->ready_try_change_size < n_ready)
23229 data->ready_try_change = sbitmap_resize (data->ready_try_change,
23231 data->ready_try_change_size = n_ready;
23233 sbitmap_zero (data->ready_try_change);
23235 /* Filter out insns from ready_try that the core will not be able to issue
23236 on current cycle due to decoder. */
23237 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
23241 /* Revert the effect on ready_try. */
23243 core2i7_first_cycle_multipass_backtrack (const void *_data,
23245 int n_ready ATTRIBUTE_UNUSED)
23247 const_ix86_first_cycle_multipass_data_t data
23248 = (const_ix86_first_cycle_multipass_data_t) _data;
23249 unsigned int i = 0;
23250 sbitmap_iterator sbi;
23252 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
23253 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
23259 /* Save the result of multipass lookahead scheduling for the next round. */
23261 core2i7_first_cycle_multipass_end (const void *_data)
23263 const_ix86_first_cycle_multipass_data_t data
23264 = (const_ix86_first_cycle_multipass_data_t) _data;
23265 ix86_first_cycle_multipass_data_t next_data
23266 = ix86_first_cycle_multipass_data;
23270 next_data->ifetch_block_len = data->ifetch_block_len;
23271 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
23275 /* Deallocate target data. */
23277 core2i7_first_cycle_multipass_fini (void *_data)
23279 ix86_first_cycle_multipass_data_t data
23280 = (ix86_first_cycle_multipass_data_t) _data;
23282 if (data->ready_try_change)
23284 sbitmap_free (data->ready_try_change);
23285 data->ready_try_change = NULL;
23286 data->ready_try_change_size = 0;
23290 /* Prepare for scheduling pass. */
23292 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
23293 int verbose ATTRIBUTE_UNUSED,
23294 int max_uid ATTRIBUTE_UNUSED)
23296 /* Install scheduling hooks for current CPU. Some of these hooks are used
23297 in time-critical parts of the scheduler, so we only set them up when
23298 they are actually used. */
23301 case PROCESSOR_CORE2_32:
23302 case PROCESSOR_CORE2_64:
23303 case PROCESSOR_COREI7_32:
23304 case PROCESSOR_COREI7_64:
23305 targetm.sched.dfa_post_advance_cycle
23306 = core2i7_dfa_post_advance_cycle;
23307 targetm.sched.first_cycle_multipass_init
23308 = core2i7_first_cycle_multipass_init;
23309 targetm.sched.first_cycle_multipass_begin
23310 = core2i7_first_cycle_multipass_begin;
23311 targetm.sched.first_cycle_multipass_issue
23312 = core2i7_first_cycle_multipass_issue;
23313 targetm.sched.first_cycle_multipass_backtrack
23314 = core2i7_first_cycle_multipass_backtrack;
23315 targetm.sched.first_cycle_multipass_end
23316 = core2i7_first_cycle_multipass_end;
23317 targetm.sched.first_cycle_multipass_fini
23318 = core2i7_first_cycle_multipass_fini;
23320 /* Set decoder parameters. */
23321 core2i7_secondary_decoder_max_insn_size = 8;
23322 core2i7_ifetch_block_size = 16;
23323 core2i7_ifetch_block_max_insns = 6;
23327 targetm.sched.dfa_post_advance_cycle = NULL;
23328 targetm.sched.first_cycle_multipass_init = NULL;
23329 targetm.sched.first_cycle_multipass_begin = NULL;
23330 targetm.sched.first_cycle_multipass_issue = NULL;
23331 targetm.sched.first_cycle_multipass_backtrack = NULL;
23332 targetm.sched.first_cycle_multipass_end = NULL;
23333 targetm.sched.first_cycle_multipass_fini = NULL;
23339 /* Compute the alignment given to a constant that is being placed in memory.
23340 EXP is the constant and ALIGN is the alignment that the object would
23342 The value of this function is used instead of that alignment to align
23346 ix86_constant_alignment (tree exp, int align)
23348 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
23349 || TREE_CODE (exp) == INTEGER_CST)
23351 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
23353 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
23356 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
23357 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
23358 return BITS_PER_WORD;
23363 /* Compute the alignment for a static variable.
23364 TYPE is the data type, and ALIGN is the alignment that
23365 the object would ordinarily have. The value of this function is used
23366 instead of that alignment to align the object. */
23369 ix86_data_alignment (tree type, int align)
23371 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
23373 if (AGGREGATE_TYPE_P (type)
23374 && TYPE_SIZE (type)
23375 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23376 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
23377 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
23378 && align < max_align)
23381 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23382 to 16byte boundary. */
23385 if (AGGREGATE_TYPE_P (type)
23386 && TYPE_SIZE (type)
23387 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23388 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
23389 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23393 if (TREE_CODE (type) == ARRAY_TYPE)
23395 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23397 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23400 else if (TREE_CODE (type) == COMPLEX_TYPE)
23403 if (TYPE_MODE (type) == DCmode && align < 64)
23405 if ((TYPE_MODE (type) == XCmode
23406 || TYPE_MODE (type) == TCmode) && align < 128)
23409 else if ((TREE_CODE (type) == RECORD_TYPE
23410 || TREE_CODE (type) == UNION_TYPE
23411 || TREE_CODE (type) == QUAL_UNION_TYPE)
23412 && TYPE_FIELDS (type))
23414 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23416 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23419 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23420 || TREE_CODE (type) == INTEGER_TYPE)
23422 if (TYPE_MODE (type) == DFmode && align < 64)
23424 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23431 /* Compute the alignment for a local variable or a stack slot. EXP is
23432 the data type or decl itself, MODE is the widest mode available and
23433 ALIGN is the alignment that the object would ordinarily have. The
23434 value of this macro is used instead of that alignment to align the
23438 ix86_local_alignment (tree exp, enum machine_mode mode,
23439 unsigned int align)
23443 if (exp && DECL_P (exp))
23445 type = TREE_TYPE (exp);
23454 /* Don't do dynamic stack realignment for long long objects with
23455 -mpreferred-stack-boundary=2. */
23458 && ix86_preferred_stack_boundary < 64
23459 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
23460 && (!type || !TYPE_USER_ALIGN (type))
23461 && (!decl || !DECL_USER_ALIGN (decl)))
23464 /* If TYPE is NULL, we are allocating a stack slot for caller-save
23465 register in MODE. We will return the largest alignment of XF
23469 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
23470 align = GET_MODE_ALIGNMENT (DFmode);
23474 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23475 to 16byte boundary. Exact wording is:
23477 An array uses the same alignment as its elements, except that a local or
23478 global array variable of length at least 16 bytes or
23479 a C99 variable-length array variable always has alignment of at least 16 bytes.
23481 This was added to allow use of aligned SSE instructions at arrays. This
23482 rule is meant for static storage (where compiler can not do the analysis
23483 by itself). We follow it for automatic variables only when convenient.
23484 We fully control everything in the function compiled and functions from
23485 other unit can not rely on the alignment.
23487 Exclude va_list type. It is the common case of local array where
23488 we can not benefit from the alignment. */
23489 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
23492 if (AGGREGATE_TYPE_P (type)
23493 && (va_list_type_node == NULL_TREE
23494 || (TYPE_MAIN_VARIANT (type)
23495 != TYPE_MAIN_VARIANT (va_list_type_node)))
23496 && TYPE_SIZE (type)
23497 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23498 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
23499 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23502 if (TREE_CODE (type) == ARRAY_TYPE)
23504 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23506 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23509 else if (TREE_CODE (type) == COMPLEX_TYPE)
23511 if (TYPE_MODE (type) == DCmode && align < 64)
23513 if ((TYPE_MODE (type) == XCmode
23514 || TYPE_MODE (type) == TCmode) && align < 128)
23517 else if ((TREE_CODE (type) == RECORD_TYPE
23518 || TREE_CODE (type) == UNION_TYPE
23519 || TREE_CODE (type) == QUAL_UNION_TYPE)
23520 && TYPE_FIELDS (type))
23522 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23524 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23527 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23528 || TREE_CODE (type) == INTEGER_TYPE)
23531 if (TYPE_MODE (type) == DFmode && align < 64)
23533 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23539 /* Compute the minimum required alignment for dynamic stack realignment
23540 purposes for a local variable, parameter or a stack slot. EXP is
23541 the data type or decl itself, MODE is its mode and ALIGN is the
23542 alignment that the object would ordinarily have. */
23545 ix86_minimum_alignment (tree exp, enum machine_mode mode,
23546 unsigned int align)
23550 if (exp && DECL_P (exp))
23552 type = TREE_TYPE (exp);
23561 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
23564 /* Don't do dynamic stack realignment for long long objects with
23565 -mpreferred-stack-boundary=2. */
23566 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
23567 && (!type || !TYPE_USER_ALIGN (type))
23568 && (!decl || !DECL_USER_ALIGN (decl)))
23574 /* Find a location for the static chain incoming to a nested function.
23575 This is a register, unless all free registers are used by arguments. */
23578 ix86_static_chain (const_tree fndecl, bool incoming_p)
23582 if (!DECL_STATIC_CHAIN (fndecl))
23587 /* We always use R10 in 64-bit mode. */
23595 /* By default in 32-bit mode we use ECX to pass the static chain. */
23598 fntype = TREE_TYPE (fndecl);
23599 ccvt = ix86_get_callcvt (fntype);
23600 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
23602 /* Fastcall functions use ecx/edx for arguments, which leaves
23603 us with EAX for the static chain.
23604 Thiscall functions use ecx for arguments, which also
23605 leaves us with EAX for the static chain. */
23608 else if (ix86_function_regparm (fntype, fndecl) == 3)
23610 /* For regparm 3, we have no free call-clobbered registers in
23611 which to store the static chain. In order to implement this,
23612 we have the trampoline push the static chain to the stack.
23613 However, we can't push a value below the return address when
23614 we call the nested function directly, so we have to use an
23615 alternate entry point. For this we use ESI, and have the
23616 alternate entry point push ESI, so that things appear the
23617 same once we're executing the nested function. */
23620 if (fndecl == current_function_decl)
23621 ix86_static_chain_on_stack = true;
23622 return gen_frame_mem (SImode,
23623 plus_constant (arg_pointer_rtx, -8));
23629 return gen_rtx_REG (Pmode, regno);
23632 /* Emit RTL insns to initialize the variable parts of a trampoline.
23633 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23634 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23635 to be passed to the target function. */
23638 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
23644 fnaddr = XEXP (DECL_RTL (fndecl), 0);
23650 /* Load the function address to r11. Try to load address using
23651 the shorter movl instead of movabs. We may want to support
23652 movq for kernel mode, but kernel does not use trampolines at
23654 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
23656 fnaddr = copy_to_mode_reg (DImode, fnaddr);
23658 mem = adjust_address (m_tramp, HImode, offset);
23659 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
23661 mem = adjust_address (m_tramp, SImode, offset + 2);
23662 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
23667 mem = adjust_address (m_tramp, HImode, offset);
23668 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
23670 mem = adjust_address (m_tramp, DImode, offset + 2);
23671 emit_move_insn (mem, fnaddr);
23675 /* Load static chain using movabs to r10. Use the
23676 shorter movl instead of movabs for x32. */
23688 mem = adjust_address (m_tramp, HImode, offset);
23689 emit_move_insn (mem, gen_int_mode (opcode, HImode));
23691 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
23692 emit_move_insn (mem, chain_value);
23695 /* Jump to r11; the last (unused) byte is a nop, only there to
23696 pad the write out to a single 32-bit store. */
23697 mem = adjust_address (m_tramp, SImode, offset);
23698 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
23705 /* Depending on the static chain location, either load a register
23706 with a constant, or push the constant to the stack. All of the
23707 instructions are the same size. */
23708 chain = ix86_static_chain (fndecl, true);
23711 switch (REGNO (chain))
23714 opcode = 0xb8; break;
23716 opcode = 0xb9; break;
23718 gcc_unreachable ();
23724 mem = adjust_address (m_tramp, QImode, offset);
23725 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23727 mem = adjust_address (m_tramp, SImode, offset + 1);
23728 emit_move_insn (mem, chain_value);
23731 mem = adjust_address (m_tramp, QImode, offset);
23732 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23734 mem = adjust_address (m_tramp, SImode, offset + 1);
23736 /* Compute offset from the end of the jmp to the target function.
23737 In the case in which the trampoline stores the static chain on
23738 the stack, we need to skip the first insn which pushes the
23739 (call-saved) register static chain; this push is 1 byte. */
23741 disp = expand_binop (SImode, sub_optab, fnaddr,
23742 plus_constant (XEXP (m_tramp, 0),
23743 offset - (MEM_P (chain) ? 1 : 0)),
23744 NULL_RTX, 1, OPTAB_DIRECT);
23745 emit_move_insn (mem, disp);
23748 gcc_assert (offset <= TRAMPOLINE_SIZE);
23750 #ifdef HAVE_ENABLE_EXECUTE_STACK
23751 #ifdef CHECK_EXECUTE_STACK_ENABLED
23752 if (CHECK_EXECUTE_STACK_ENABLED)
23754 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23755 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23759 /* The following file contains several enumerations and data structures
23760 built from the definitions in i386-builtin-types.def. */
23762 #include "i386-builtin-types.inc"
23764 /* Table for the ix86 builtin non-function types. */
23765 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23767 /* Retrieve an element from the above table, building some of
23768 the types lazily. */
23771 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23773 unsigned int index;
23776 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23778 type = ix86_builtin_type_tab[(int) tcode];
23782 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23783 if (tcode <= IX86_BT_LAST_VECT)
23785 enum machine_mode mode;
23787 index = tcode - IX86_BT_LAST_PRIM - 1;
23788 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23789 mode = ix86_builtin_type_vect_mode[index];
23791 type = build_vector_type_for_mode (itype, mode);
23797 index = tcode - IX86_BT_LAST_VECT - 1;
23798 if (tcode <= IX86_BT_LAST_PTR)
23799 quals = TYPE_UNQUALIFIED;
23801 quals = TYPE_QUAL_CONST;
23803 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23804 if (quals != TYPE_UNQUALIFIED)
23805 itype = build_qualified_type (itype, quals);
23807 type = build_pointer_type (itype);
23810 ix86_builtin_type_tab[(int) tcode] = type;
23814 /* Table for the ix86 builtin function types. */
23815 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23817 /* Retrieve an element from the above table, building some of
23818 the types lazily. */
23821 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23825 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23827 type = ix86_builtin_func_type_tab[(int) tcode];
23831 if (tcode <= IX86_BT_LAST_FUNC)
23833 unsigned start = ix86_builtin_func_start[(int) tcode];
23834 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23835 tree rtype, atype, args = void_list_node;
23838 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23839 for (i = after - 1; i > start; --i)
23841 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23842 args = tree_cons (NULL, atype, args);
23845 type = build_function_type (rtype, args);
23849 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23850 enum ix86_builtin_func_type icode;
23852 icode = ix86_builtin_func_alias_base[index];
23853 type = ix86_get_builtin_func_type (icode);
23856 ix86_builtin_func_type_tab[(int) tcode] = type;
23861 /* Codes for all the SSE/MMX builtins. */
23864 IX86_BUILTIN_ADDPS,
23865 IX86_BUILTIN_ADDSS,
23866 IX86_BUILTIN_DIVPS,
23867 IX86_BUILTIN_DIVSS,
23868 IX86_BUILTIN_MULPS,
23869 IX86_BUILTIN_MULSS,
23870 IX86_BUILTIN_SUBPS,
23871 IX86_BUILTIN_SUBSS,
23873 IX86_BUILTIN_CMPEQPS,
23874 IX86_BUILTIN_CMPLTPS,
23875 IX86_BUILTIN_CMPLEPS,
23876 IX86_BUILTIN_CMPGTPS,
23877 IX86_BUILTIN_CMPGEPS,
23878 IX86_BUILTIN_CMPNEQPS,
23879 IX86_BUILTIN_CMPNLTPS,
23880 IX86_BUILTIN_CMPNLEPS,
23881 IX86_BUILTIN_CMPNGTPS,
23882 IX86_BUILTIN_CMPNGEPS,
23883 IX86_BUILTIN_CMPORDPS,
23884 IX86_BUILTIN_CMPUNORDPS,
23885 IX86_BUILTIN_CMPEQSS,
23886 IX86_BUILTIN_CMPLTSS,
23887 IX86_BUILTIN_CMPLESS,
23888 IX86_BUILTIN_CMPNEQSS,
23889 IX86_BUILTIN_CMPNLTSS,
23890 IX86_BUILTIN_CMPNLESS,
23891 IX86_BUILTIN_CMPNGTSS,
23892 IX86_BUILTIN_CMPNGESS,
23893 IX86_BUILTIN_CMPORDSS,
23894 IX86_BUILTIN_CMPUNORDSS,
23896 IX86_BUILTIN_COMIEQSS,
23897 IX86_BUILTIN_COMILTSS,
23898 IX86_BUILTIN_COMILESS,
23899 IX86_BUILTIN_COMIGTSS,
23900 IX86_BUILTIN_COMIGESS,
23901 IX86_BUILTIN_COMINEQSS,
23902 IX86_BUILTIN_UCOMIEQSS,
23903 IX86_BUILTIN_UCOMILTSS,
23904 IX86_BUILTIN_UCOMILESS,
23905 IX86_BUILTIN_UCOMIGTSS,
23906 IX86_BUILTIN_UCOMIGESS,
23907 IX86_BUILTIN_UCOMINEQSS,
23909 IX86_BUILTIN_CVTPI2PS,
23910 IX86_BUILTIN_CVTPS2PI,
23911 IX86_BUILTIN_CVTSI2SS,
23912 IX86_BUILTIN_CVTSI642SS,
23913 IX86_BUILTIN_CVTSS2SI,
23914 IX86_BUILTIN_CVTSS2SI64,
23915 IX86_BUILTIN_CVTTPS2PI,
23916 IX86_BUILTIN_CVTTSS2SI,
23917 IX86_BUILTIN_CVTTSS2SI64,
23919 IX86_BUILTIN_MAXPS,
23920 IX86_BUILTIN_MAXSS,
23921 IX86_BUILTIN_MINPS,
23922 IX86_BUILTIN_MINSS,
23924 IX86_BUILTIN_LOADUPS,
23925 IX86_BUILTIN_STOREUPS,
23926 IX86_BUILTIN_MOVSS,
23928 IX86_BUILTIN_MOVHLPS,
23929 IX86_BUILTIN_MOVLHPS,
23930 IX86_BUILTIN_LOADHPS,
23931 IX86_BUILTIN_LOADLPS,
23932 IX86_BUILTIN_STOREHPS,
23933 IX86_BUILTIN_STORELPS,
23935 IX86_BUILTIN_MASKMOVQ,
23936 IX86_BUILTIN_MOVMSKPS,
23937 IX86_BUILTIN_PMOVMSKB,
23939 IX86_BUILTIN_MOVNTPS,
23940 IX86_BUILTIN_MOVNTQ,
23942 IX86_BUILTIN_LOADDQU,
23943 IX86_BUILTIN_STOREDQU,
23945 IX86_BUILTIN_PACKSSWB,
23946 IX86_BUILTIN_PACKSSDW,
23947 IX86_BUILTIN_PACKUSWB,
23949 IX86_BUILTIN_PADDB,
23950 IX86_BUILTIN_PADDW,
23951 IX86_BUILTIN_PADDD,
23952 IX86_BUILTIN_PADDQ,
23953 IX86_BUILTIN_PADDSB,
23954 IX86_BUILTIN_PADDSW,
23955 IX86_BUILTIN_PADDUSB,
23956 IX86_BUILTIN_PADDUSW,
23957 IX86_BUILTIN_PSUBB,
23958 IX86_BUILTIN_PSUBW,
23959 IX86_BUILTIN_PSUBD,
23960 IX86_BUILTIN_PSUBQ,
23961 IX86_BUILTIN_PSUBSB,
23962 IX86_BUILTIN_PSUBSW,
23963 IX86_BUILTIN_PSUBUSB,
23964 IX86_BUILTIN_PSUBUSW,
23967 IX86_BUILTIN_PANDN,
23971 IX86_BUILTIN_PAVGB,
23972 IX86_BUILTIN_PAVGW,
23974 IX86_BUILTIN_PCMPEQB,
23975 IX86_BUILTIN_PCMPEQW,
23976 IX86_BUILTIN_PCMPEQD,
23977 IX86_BUILTIN_PCMPGTB,
23978 IX86_BUILTIN_PCMPGTW,
23979 IX86_BUILTIN_PCMPGTD,
23981 IX86_BUILTIN_PMADDWD,
23983 IX86_BUILTIN_PMAXSW,
23984 IX86_BUILTIN_PMAXUB,
23985 IX86_BUILTIN_PMINSW,
23986 IX86_BUILTIN_PMINUB,
23988 IX86_BUILTIN_PMULHUW,
23989 IX86_BUILTIN_PMULHW,
23990 IX86_BUILTIN_PMULLW,
23992 IX86_BUILTIN_PSADBW,
23993 IX86_BUILTIN_PSHUFW,
23995 IX86_BUILTIN_PSLLW,
23996 IX86_BUILTIN_PSLLD,
23997 IX86_BUILTIN_PSLLQ,
23998 IX86_BUILTIN_PSRAW,
23999 IX86_BUILTIN_PSRAD,
24000 IX86_BUILTIN_PSRLW,
24001 IX86_BUILTIN_PSRLD,
24002 IX86_BUILTIN_PSRLQ,
24003 IX86_BUILTIN_PSLLWI,
24004 IX86_BUILTIN_PSLLDI,
24005 IX86_BUILTIN_PSLLQI,
24006 IX86_BUILTIN_PSRAWI,
24007 IX86_BUILTIN_PSRADI,
24008 IX86_BUILTIN_PSRLWI,
24009 IX86_BUILTIN_PSRLDI,
24010 IX86_BUILTIN_PSRLQI,
24012 IX86_BUILTIN_PUNPCKHBW,
24013 IX86_BUILTIN_PUNPCKHWD,
24014 IX86_BUILTIN_PUNPCKHDQ,
24015 IX86_BUILTIN_PUNPCKLBW,
24016 IX86_BUILTIN_PUNPCKLWD,
24017 IX86_BUILTIN_PUNPCKLDQ,
24019 IX86_BUILTIN_SHUFPS,
24021 IX86_BUILTIN_RCPPS,
24022 IX86_BUILTIN_RCPSS,
24023 IX86_BUILTIN_RSQRTPS,
24024 IX86_BUILTIN_RSQRTPS_NR,
24025 IX86_BUILTIN_RSQRTSS,
24026 IX86_BUILTIN_RSQRTF,
24027 IX86_BUILTIN_SQRTPS,
24028 IX86_BUILTIN_SQRTPS_NR,
24029 IX86_BUILTIN_SQRTSS,
24031 IX86_BUILTIN_UNPCKHPS,
24032 IX86_BUILTIN_UNPCKLPS,
24034 IX86_BUILTIN_ANDPS,
24035 IX86_BUILTIN_ANDNPS,
24037 IX86_BUILTIN_XORPS,
24040 IX86_BUILTIN_LDMXCSR,
24041 IX86_BUILTIN_STMXCSR,
24042 IX86_BUILTIN_SFENCE,
24044 /* 3DNow! Original */
24045 IX86_BUILTIN_FEMMS,
24046 IX86_BUILTIN_PAVGUSB,
24047 IX86_BUILTIN_PF2ID,
24048 IX86_BUILTIN_PFACC,
24049 IX86_BUILTIN_PFADD,
24050 IX86_BUILTIN_PFCMPEQ,
24051 IX86_BUILTIN_PFCMPGE,
24052 IX86_BUILTIN_PFCMPGT,
24053 IX86_BUILTIN_PFMAX,
24054 IX86_BUILTIN_PFMIN,
24055 IX86_BUILTIN_PFMUL,
24056 IX86_BUILTIN_PFRCP,
24057 IX86_BUILTIN_PFRCPIT1,
24058 IX86_BUILTIN_PFRCPIT2,
24059 IX86_BUILTIN_PFRSQIT1,
24060 IX86_BUILTIN_PFRSQRT,
24061 IX86_BUILTIN_PFSUB,
24062 IX86_BUILTIN_PFSUBR,
24063 IX86_BUILTIN_PI2FD,
24064 IX86_BUILTIN_PMULHRW,
24066 /* 3DNow! Athlon Extensions */
24067 IX86_BUILTIN_PF2IW,
24068 IX86_BUILTIN_PFNACC,
24069 IX86_BUILTIN_PFPNACC,
24070 IX86_BUILTIN_PI2FW,
24071 IX86_BUILTIN_PSWAPDSI,
24072 IX86_BUILTIN_PSWAPDSF,
24075 IX86_BUILTIN_ADDPD,
24076 IX86_BUILTIN_ADDSD,
24077 IX86_BUILTIN_DIVPD,
24078 IX86_BUILTIN_DIVSD,
24079 IX86_BUILTIN_MULPD,
24080 IX86_BUILTIN_MULSD,
24081 IX86_BUILTIN_SUBPD,
24082 IX86_BUILTIN_SUBSD,
24084 IX86_BUILTIN_CMPEQPD,
24085 IX86_BUILTIN_CMPLTPD,
24086 IX86_BUILTIN_CMPLEPD,
24087 IX86_BUILTIN_CMPGTPD,
24088 IX86_BUILTIN_CMPGEPD,
24089 IX86_BUILTIN_CMPNEQPD,
24090 IX86_BUILTIN_CMPNLTPD,
24091 IX86_BUILTIN_CMPNLEPD,
24092 IX86_BUILTIN_CMPNGTPD,
24093 IX86_BUILTIN_CMPNGEPD,
24094 IX86_BUILTIN_CMPORDPD,
24095 IX86_BUILTIN_CMPUNORDPD,
24096 IX86_BUILTIN_CMPEQSD,
24097 IX86_BUILTIN_CMPLTSD,
24098 IX86_BUILTIN_CMPLESD,
24099 IX86_BUILTIN_CMPNEQSD,
24100 IX86_BUILTIN_CMPNLTSD,
24101 IX86_BUILTIN_CMPNLESD,
24102 IX86_BUILTIN_CMPORDSD,
24103 IX86_BUILTIN_CMPUNORDSD,
24105 IX86_BUILTIN_COMIEQSD,
24106 IX86_BUILTIN_COMILTSD,
24107 IX86_BUILTIN_COMILESD,
24108 IX86_BUILTIN_COMIGTSD,
24109 IX86_BUILTIN_COMIGESD,
24110 IX86_BUILTIN_COMINEQSD,
24111 IX86_BUILTIN_UCOMIEQSD,
24112 IX86_BUILTIN_UCOMILTSD,
24113 IX86_BUILTIN_UCOMILESD,
24114 IX86_BUILTIN_UCOMIGTSD,
24115 IX86_BUILTIN_UCOMIGESD,
24116 IX86_BUILTIN_UCOMINEQSD,
24118 IX86_BUILTIN_MAXPD,
24119 IX86_BUILTIN_MAXSD,
24120 IX86_BUILTIN_MINPD,
24121 IX86_BUILTIN_MINSD,
24123 IX86_BUILTIN_ANDPD,
24124 IX86_BUILTIN_ANDNPD,
24126 IX86_BUILTIN_XORPD,
24128 IX86_BUILTIN_SQRTPD,
24129 IX86_BUILTIN_SQRTSD,
24131 IX86_BUILTIN_UNPCKHPD,
24132 IX86_BUILTIN_UNPCKLPD,
24134 IX86_BUILTIN_SHUFPD,
24136 IX86_BUILTIN_LOADUPD,
24137 IX86_BUILTIN_STOREUPD,
24138 IX86_BUILTIN_MOVSD,
24140 IX86_BUILTIN_LOADHPD,
24141 IX86_BUILTIN_LOADLPD,
24143 IX86_BUILTIN_CVTDQ2PD,
24144 IX86_BUILTIN_CVTDQ2PS,
24146 IX86_BUILTIN_CVTPD2DQ,
24147 IX86_BUILTIN_CVTPD2PI,
24148 IX86_BUILTIN_CVTPD2PS,
24149 IX86_BUILTIN_CVTTPD2DQ,
24150 IX86_BUILTIN_CVTTPD2PI,
24152 IX86_BUILTIN_CVTPI2PD,
24153 IX86_BUILTIN_CVTSI2SD,
24154 IX86_BUILTIN_CVTSI642SD,
24156 IX86_BUILTIN_CVTSD2SI,
24157 IX86_BUILTIN_CVTSD2SI64,
24158 IX86_BUILTIN_CVTSD2SS,
24159 IX86_BUILTIN_CVTSS2SD,
24160 IX86_BUILTIN_CVTTSD2SI,
24161 IX86_BUILTIN_CVTTSD2SI64,
24163 IX86_BUILTIN_CVTPS2DQ,
24164 IX86_BUILTIN_CVTPS2PD,
24165 IX86_BUILTIN_CVTTPS2DQ,
24167 IX86_BUILTIN_MOVNTI,
24168 IX86_BUILTIN_MOVNTPD,
24169 IX86_BUILTIN_MOVNTDQ,
24171 IX86_BUILTIN_MOVQ128,
24174 IX86_BUILTIN_MASKMOVDQU,
24175 IX86_BUILTIN_MOVMSKPD,
24176 IX86_BUILTIN_PMOVMSKB128,
24178 IX86_BUILTIN_PACKSSWB128,
24179 IX86_BUILTIN_PACKSSDW128,
24180 IX86_BUILTIN_PACKUSWB128,
24182 IX86_BUILTIN_PADDB128,
24183 IX86_BUILTIN_PADDW128,
24184 IX86_BUILTIN_PADDD128,
24185 IX86_BUILTIN_PADDQ128,
24186 IX86_BUILTIN_PADDSB128,
24187 IX86_BUILTIN_PADDSW128,
24188 IX86_BUILTIN_PADDUSB128,
24189 IX86_BUILTIN_PADDUSW128,
24190 IX86_BUILTIN_PSUBB128,
24191 IX86_BUILTIN_PSUBW128,
24192 IX86_BUILTIN_PSUBD128,
24193 IX86_BUILTIN_PSUBQ128,
24194 IX86_BUILTIN_PSUBSB128,
24195 IX86_BUILTIN_PSUBSW128,
24196 IX86_BUILTIN_PSUBUSB128,
24197 IX86_BUILTIN_PSUBUSW128,
24199 IX86_BUILTIN_PAND128,
24200 IX86_BUILTIN_PANDN128,
24201 IX86_BUILTIN_POR128,
24202 IX86_BUILTIN_PXOR128,
24204 IX86_BUILTIN_PAVGB128,
24205 IX86_BUILTIN_PAVGW128,
24207 IX86_BUILTIN_PCMPEQB128,
24208 IX86_BUILTIN_PCMPEQW128,
24209 IX86_BUILTIN_PCMPEQD128,
24210 IX86_BUILTIN_PCMPGTB128,
24211 IX86_BUILTIN_PCMPGTW128,
24212 IX86_BUILTIN_PCMPGTD128,
24214 IX86_BUILTIN_PMADDWD128,
24216 IX86_BUILTIN_PMAXSW128,
24217 IX86_BUILTIN_PMAXUB128,
24218 IX86_BUILTIN_PMINSW128,
24219 IX86_BUILTIN_PMINUB128,
24221 IX86_BUILTIN_PMULUDQ,
24222 IX86_BUILTIN_PMULUDQ128,
24223 IX86_BUILTIN_PMULHUW128,
24224 IX86_BUILTIN_PMULHW128,
24225 IX86_BUILTIN_PMULLW128,
24227 IX86_BUILTIN_PSADBW128,
24228 IX86_BUILTIN_PSHUFHW,
24229 IX86_BUILTIN_PSHUFLW,
24230 IX86_BUILTIN_PSHUFD,
24232 IX86_BUILTIN_PSLLDQI128,
24233 IX86_BUILTIN_PSLLWI128,
24234 IX86_BUILTIN_PSLLDI128,
24235 IX86_BUILTIN_PSLLQI128,
24236 IX86_BUILTIN_PSRAWI128,
24237 IX86_BUILTIN_PSRADI128,
24238 IX86_BUILTIN_PSRLDQI128,
24239 IX86_BUILTIN_PSRLWI128,
24240 IX86_BUILTIN_PSRLDI128,
24241 IX86_BUILTIN_PSRLQI128,
24243 IX86_BUILTIN_PSLLDQ128,
24244 IX86_BUILTIN_PSLLW128,
24245 IX86_BUILTIN_PSLLD128,
24246 IX86_BUILTIN_PSLLQ128,
24247 IX86_BUILTIN_PSRAW128,
24248 IX86_BUILTIN_PSRAD128,
24249 IX86_BUILTIN_PSRLW128,
24250 IX86_BUILTIN_PSRLD128,
24251 IX86_BUILTIN_PSRLQ128,
24253 IX86_BUILTIN_PUNPCKHBW128,
24254 IX86_BUILTIN_PUNPCKHWD128,
24255 IX86_BUILTIN_PUNPCKHDQ128,
24256 IX86_BUILTIN_PUNPCKHQDQ128,
24257 IX86_BUILTIN_PUNPCKLBW128,
24258 IX86_BUILTIN_PUNPCKLWD128,
24259 IX86_BUILTIN_PUNPCKLDQ128,
24260 IX86_BUILTIN_PUNPCKLQDQ128,
24262 IX86_BUILTIN_CLFLUSH,
24263 IX86_BUILTIN_MFENCE,
24264 IX86_BUILTIN_LFENCE,
24265 IX86_BUILTIN_PAUSE,
24267 IX86_BUILTIN_BSRSI,
24268 IX86_BUILTIN_BSRDI,
24269 IX86_BUILTIN_RDPMC,
24270 IX86_BUILTIN_RDTSC,
24271 IX86_BUILTIN_RDTSCP,
24272 IX86_BUILTIN_ROLQI,
24273 IX86_BUILTIN_ROLHI,
24274 IX86_BUILTIN_RORQI,
24275 IX86_BUILTIN_RORHI,
24278 IX86_BUILTIN_ADDSUBPS,
24279 IX86_BUILTIN_HADDPS,
24280 IX86_BUILTIN_HSUBPS,
24281 IX86_BUILTIN_MOVSHDUP,
24282 IX86_BUILTIN_MOVSLDUP,
24283 IX86_BUILTIN_ADDSUBPD,
24284 IX86_BUILTIN_HADDPD,
24285 IX86_BUILTIN_HSUBPD,
24286 IX86_BUILTIN_LDDQU,
24288 IX86_BUILTIN_MONITOR,
24289 IX86_BUILTIN_MWAIT,
24292 IX86_BUILTIN_PHADDW,
24293 IX86_BUILTIN_PHADDD,
24294 IX86_BUILTIN_PHADDSW,
24295 IX86_BUILTIN_PHSUBW,
24296 IX86_BUILTIN_PHSUBD,
24297 IX86_BUILTIN_PHSUBSW,
24298 IX86_BUILTIN_PMADDUBSW,
24299 IX86_BUILTIN_PMULHRSW,
24300 IX86_BUILTIN_PSHUFB,
24301 IX86_BUILTIN_PSIGNB,
24302 IX86_BUILTIN_PSIGNW,
24303 IX86_BUILTIN_PSIGND,
24304 IX86_BUILTIN_PALIGNR,
24305 IX86_BUILTIN_PABSB,
24306 IX86_BUILTIN_PABSW,
24307 IX86_BUILTIN_PABSD,
24309 IX86_BUILTIN_PHADDW128,
24310 IX86_BUILTIN_PHADDD128,
24311 IX86_BUILTIN_PHADDSW128,
24312 IX86_BUILTIN_PHSUBW128,
24313 IX86_BUILTIN_PHSUBD128,
24314 IX86_BUILTIN_PHSUBSW128,
24315 IX86_BUILTIN_PMADDUBSW128,
24316 IX86_BUILTIN_PMULHRSW128,
24317 IX86_BUILTIN_PSHUFB128,
24318 IX86_BUILTIN_PSIGNB128,
24319 IX86_BUILTIN_PSIGNW128,
24320 IX86_BUILTIN_PSIGND128,
24321 IX86_BUILTIN_PALIGNR128,
24322 IX86_BUILTIN_PABSB128,
24323 IX86_BUILTIN_PABSW128,
24324 IX86_BUILTIN_PABSD128,
24326 /* AMDFAM10 - SSE4A New Instructions. */
24327 IX86_BUILTIN_MOVNTSD,
24328 IX86_BUILTIN_MOVNTSS,
24329 IX86_BUILTIN_EXTRQI,
24330 IX86_BUILTIN_EXTRQ,
24331 IX86_BUILTIN_INSERTQI,
24332 IX86_BUILTIN_INSERTQ,
24335 IX86_BUILTIN_BLENDPD,
24336 IX86_BUILTIN_BLENDPS,
24337 IX86_BUILTIN_BLENDVPD,
24338 IX86_BUILTIN_BLENDVPS,
24339 IX86_BUILTIN_PBLENDVB128,
24340 IX86_BUILTIN_PBLENDW128,
24345 IX86_BUILTIN_INSERTPS128,
24347 IX86_BUILTIN_MOVNTDQA,
24348 IX86_BUILTIN_MPSADBW128,
24349 IX86_BUILTIN_PACKUSDW128,
24350 IX86_BUILTIN_PCMPEQQ,
24351 IX86_BUILTIN_PHMINPOSUW128,
24353 IX86_BUILTIN_PMAXSB128,
24354 IX86_BUILTIN_PMAXSD128,
24355 IX86_BUILTIN_PMAXUD128,
24356 IX86_BUILTIN_PMAXUW128,
24358 IX86_BUILTIN_PMINSB128,
24359 IX86_BUILTIN_PMINSD128,
24360 IX86_BUILTIN_PMINUD128,
24361 IX86_BUILTIN_PMINUW128,
24363 IX86_BUILTIN_PMOVSXBW128,
24364 IX86_BUILTIN_PMOVSXBD128,
24365 IX86_BUILTIN_PMOVSXBQ128,
24366 IX86_BUILTIN_PMOVSXWD128,
24367 IX86_BUILTIN_PMOVSXWQ128,
24368 IX86_BUILTIN_PMOVSXDQ128,
24370 IX86_BUILTIN_PMOVZXBW128,
24371 IX86_BUILTIN_PMOVZXBD128,
24372 IX86_BUILTIN_PMOVZXBQ128,
24373 IX86_BUILTIN_PMOVZXWD128,
24374 IX86_BUILTIN_PMOVZXWQ128,
24375 IX86_BUILTIN_PMOVZXDQ128,
24377 IX86_BUILTIN_PMULDQ128,
24378 IX86_BUILTIN_PMULLD128,
24380 IX86_BUILTIN_ROUNDPD,
24381 IX86_BUILTIN_ROUNDPS,
24382 IX86_BUILTIN_ROUNDSD,
24383 IX86_BUILTIN_ROUNDSS,
24385 IX86_BUILTIN_FLOORPD,
24386 IX86_BUILTIN_CEILPD,
24387 IX86_BUILTIN_TRUNCPD,
24388 IX86_BUILTIN_RINTPD,
24389 IX86_BUILTIN_ROUNDPD_AZ,
24390 IX86_BUILTIN_FLOORPS,
24391 IX86_BUILTIN_CEILPS,
24392 IX86_BUILTIN_TRUNCPS,
24393 IX86_BUILTIN_RINTPS,
24394 IX86_BUILTIN_ROUNDPS_AZ,
24396 IX86_BUILTIN_PTESTZ,
24397 IX86_BUILTIN_PTESTC,
24398 IX86_BUILTIN_PTESTNZC,
24400 IX86_BUILTIN_VEC_INIT_V2SI,
24401 IX86_BUILTIN_VEC_INIT_V4HI,
24402 IX86_BUILTIN_VEC_INIT_V8QI,
24403 IX86_BUILTIN_VEC_EXT_V2DF,
24404 IX86_BUILTIN_VEC_EXT_V2DI,
24405 IX86_BUILTIN_VEC_EXT_V4SF,
24406 IX86_BUILTIN_VEC_EXT_V4SI,
24407 IX86_BUILTIN_VEC_EXT_V8HI,
24408 IX86_BUILTIN_VEC_EXT_V2SI,
24409 IX86_BUILTIN_VEC_EXT_V4HI,
24410 IX86_BUILTIN_VEC_EXT_V16QI,
24411 IX86_BUILTIN_VEC_SET_V2DI,
24412 IX86_BUILTIN_VEC_SET_V4SF,
24413 IX86_BUILTIN_VEC_SET_V4SI,
24414 IX86_BUILTIN_VEC_SET_V8HI,
24415 IX86_BUILTIN_VEC_SET_V4HI,
24416 IX86_BUILTIN_VEC_SET_V16QI,
24418 IX86_BUILTIN_VEC_PACK_SFIX,
24421 IX86_BUILTIN_CRC32QI,
24422 IX86_BUILTIN_CRC32HI,
24423 IX86_BUILTIN_CRC32SI,
24424 IX86_BUILTIN_CRC32DI,
24426 IX86_BUILTIN_PCMPESTRI128,
24427 IX86_BUILTIN_PCMPESTRM128,
24428 IX86_BUILTIN_PCMPESTRA128,
24429 IX86_BUILTIN_PCMPESTRC128,
24430 IX86_BUILTIN_PCMPESTRO128,
24431 IX86_BUILTIN_PCMPESTRS128,
24432 IX86_BUILTIN_PCMPESTRZ128,
24433 IX86_BUILTIN_PCMPISTRI128,
24434 IX86_BUILTIN_PCMPISTRM128,
24435 IX86_BUILTIN_PCMPISTRA128,
24436 IX86_BUILTIN_PCMPISTRC128,
24437 IX86_BUILTIN_PCMPISTRO128,
24438 IX86_BUILTIN_PCMPISTRS128,
24439 IX86_BUILTIN_PCMPISTRZ128,
24441 IX86_BUILTIN_PCMPGTQ,
24443 /* AES instructions */
24444 IX86_BUILTIN_AESENC128,
24445 IX86_BUILTIN_AESENCLAST128,
24446 IX86_BUILTIN_AESDEC128,
24447 IX86_BUILTIN_AESDECLAST128,
24448 IX86_BUILTIN_AESIMC128,
24449 IX86_BUILTIN_AESKEYGENASSIST128,
24451 /* PCLMUL instruction */
24452 IX86_BUILTIN_PCLMULQDQ128,
24455 IX86_BUILTIN_ADDPD256,
24456 IX86_BUILTIN_ADDPS256,
24457 IX86_BUILTIN_ADDSUBPD256,
24458 IX86_BUILTIN_ADDSUBPS256,
24459 IX86_BUILTIN_ANDPD256,
24460 IX86_BUILTIN_ANDPS256,
24461 IX86_BUILTIN_ANDNPD256,
24462 IX86_BUILTIN_ANDNPS256,
24463 IX86_BUILTIN_BLENDPD256,
24464 IX86_BUILTIN_BLENDPS256,
24465 IX86_BUILTIN_BLENDVPD256,
24466 IX86_BUILTIN_BLENDVPS256,
24467 IX86_BUILTIN_DIVPD256,
24468 IX86_BUILTIN_DIVPS256,
24469 IX86_BUILTIN_DPPS256,
24470 IX86_BUILTIN_HADDPD256,
24471 IX86_BUILTIN_HADDPS256,
24472 IX86_BUILTIN_HSUBPD256,
24473 IX86_BUILTIN_HSUBPS256,
24474 IX86_BUILTIN_MAXPD256,
24475 IX86_BUILTIN_MAXPS256,
24476 IX86_BUILTIN_MINPD256,
24477 IX86_BUILTIN_MINPS256,
24478 IX86_BUILTIN_MULPD256,
24479 IX86_BUILTIN_MULPS256,
24480 IX86_BUILTIN_ORPD256,
24481 IX86_BUILTIN_ORPS256,
24482 IX86_BUILTIN_SHUFPD256,
24483 IX86_BUILTIN_SHUFPS256,
24484 IX86_BUILTIN_SUBPD256,
24485 IX86_BUILTIN_SUBPS256,
24486 IX86_BUILTIN_XORPD256,
24487 IX86_BUILTIN_XORPS256,
24488 IX86_BUILTIN_CMPSD,
24489 IX86_BUILTIN_CMPSS,
24490 IX86_BUILTIN_CMPPD,
24491 IX86_BUILTIN_CMPPS,
24492 IX86_BUILTIN_CMPPD256,
24493 IX86_BUILTIN_CMPPS256,
24494 IX86_BUILTIN_CVTDQ2PD256,
24495 IX86_BUILTIN_CVTDQ2PS256,
24496 IX86_BUILTIN_CVTPD2PS256,
24497 IX86_BUILTIN_CVTPS2DQ256,
24498 IX86_BUILTIN_CVTPS2PD256,
24499 IX86_BUILTIN_CVTTPD2DQ256,
24500 IX86_BUILTIN_CVTPD2DQ256,
24501 IX86_BUILTIN_CVTTPS2DQ256,
24502 IX86_BUILTIN_EXTRACTF128PD256,
24503 IX86_BUILTIN_EXTRACTF128PS256,
24504 IX86_BUILTIN_EXTRACTF128SI256,
24505 IX86_BUILTIN_VZEROALL,
24506 IX86_BUILTIN_VZEROUPPER,
24507 IX86_BUILTIN_VPERMILVARPD,
24508 IX86_BUILTIN_VPERMILVARPS,
24509 IX86_BUILTIN_VPERMILVARPD256,
24510 IX86_BUILTIN_VPERMILVARPS256,
24511 IX86_BUILTIN_VPERMILPD,
24512 IX86_BUILTIN_VPERMILPS,
24513 IX86_BUILTIN_VPERMILPD256,
24514 IX86_BUILTIN_VPERMILPS256,
24515 IX86_BUILTIN_VPERMIL2PD,
24516 IX86_BUILTIN_VPERMIL2PS,
24517 IX86_BUILTIN_VPERMIL2PD256,
24518 IX86_BUILTIN_VPERMIL2PS256,
24519 IX86_BUILTIN_VPERM2F128PD256,
24520 IX86_BUILTIN_VPERM2F128PS256,
24521 IX86_BUILTIN_VPERM2F128SI256,
24522 IX86_BUILTIN_VBROADCASTSS,
24523 IX86_BUILTIN_VBROADCASTSD256,
24524 IX86_BUILTIN_VBROADCASTSS256,
24525 IX86_BUILTIN_VBROADCASTPD256,
24526 IX86_BUILTIN_VBROADCASTPS256,
24527 IX86_BUILTIN_VINSERTF128PD256,
24528 IX86_BUILTIN_VINSERTF128PS256,
24529 IX86_BUILTIN_VINSERTF128SI256,
24530 IX86_BUILTIN_LOADUPD256,
24531 IX86_BUILTIN_LOADUPS256,
24532 IX86_BUILTIN_STOREUPD256,
24533 IX86_BUILTIN_STOREUPS256,
24534 IX86_BUILTIN_LDDQU256,
24535 IX86_BUILTIN_MOVNTDQ256,
24536 IX86_BUILTIN_MOVNTPD256,
24537 IX86_BUILTIN_MOVNTPS256,
24538 IX86_BUILTIN_LOADDQU256,
24539 IX86_BUILTIN_STOREDQU256,
24540 IX86_BUILTIN_MASKLOADPD,
24541 IX86_BUILTIN_MASKLOADPS,
24542 IX86_BUILTIN_MASKSTOREPD,
24543 IX86_BUILTIN_MASKSTOREPS,
24544 IX86_BUILTIN_MASKLOADPD256,
24545 IX86_BUILTIN_MASKLOADPS256,
24546 IX86_BUILTIN_MASKSTOREPD256,
24547 IX86_BUILTIN_MASKSTOREPS256,
24548 IX86_BUILTIN_MOVSHDUP256,
24549 IX86_BUILTIN_MOVSLDUP256,
24550 IX86_BUILTIN_MOVDDUP256,
24552 IX86_BUILTIN_SQRTPD256,
24553 IX86_BUILTIN_SQRTPS256,
24554 IX86_BUILTIN_SQRTPS_NR256,
24555 IX86_BUILTIN_RSQRTPS256,
24556 IX86_BUILTIN_RSQRTPS_NR256,
24558 IX86_BUILTIN_RCPPS256,
24560 IX86_BUILTIN_ROUNDPD256,
24561 IX86_BUILTIN_ROUNDPS256,
24563 IX86_BUILTIN_FLOORPD256,
24564 IX86_BUILTIN_CEILPD256,
24565 IX86_BUILTIN_TRUNCPD256,
24566 IX86_BUILTIN_RINTPD256,
24567 IX86_BUILTIN_ROUNDPD_AZ256,
24568 IX86_BUILTIN_FLOORPS256,
24569 IX86_BUILTIN_CEILPS256,
24570 IX86_BUILTIN_TRUNCPS256,
24571 IX86_BUILTIN_RINTPS256,
24572 IX86_BUILTIN_ROUNDPS_AZ256,
24574 IX86_BUILTIN_UNPCKHPD256,
24575 IX86_BUILTIN_UNPCKLPD256,
24576 IX86_BUILTIN_UNPCKHPS256,
24577 IX86_BUILTIN_UNPCKLPS256,
24579 IX86_BUILTIN_SI256_SI,
24580 IX86_BUILTIN_PS256_PS,
24581 IX86_BUILTIN_PD256_PD,
24582 IX86_BUILTIN_SI_SI256,
24583 IX86_BUILTIN_PS_PS256,
24584 IX86_BUILTIN_PD_PD256,
24586 IX86_BUILTIN_VTESTZPD,
24587 IX86_BUILTIN_VTESTCPD,
24588 IX86_BUILTIN_VTESTNZCPD,
24589 IX86_BUILTIN_VTESTZPS,
24590 IX86_BUILTIN_VTESTCPS,
24591 IX86_BUILTIN_VTESTNZCPS,
24592 IX86_BUILTIN_VTESTZPD256,
24593 IX86_BUILTIN_VTESTCPD256,
24594 IX86_BUILTIN_VTESTNZCPD256,
24595 IX86_BUILTIN_VTESTZPS256,
24596 IX86_BUILTIN_VTESTCPS256,
24597 IX86_BUILTIN_VTESTNZCPS256,
24598 IX86_BUILTIN_PTESTZ256,
24599 IX86_BUILTIN_PTESTC256,
24600 IX86_BUILTIN_PTESTNZC256,
24602 IX86_BUILTIN_MOVMSKPD256,
24603 IX86_BUILTIN_MOVMSKPS256,
24606 IX86_BUILTIN_MPSADBW256,
24607 IX86_BUILTIN_PABSB256,
24608 IX86_BUILTIN_PABSW256,
24609 IX86_BUILTIN_PABSD256,
24610 IX86_BUILTIN_PACKSSDW256,
24611 IX86_BUILTIN_PACKSSWB256,
24612 IX86_BUILTIN_PACKUSDW256,
24613 IX86_BUILTIN_PACKUSWB256,
24614 IX86_BUILTIN_PADDB256,
24615 IX86_BUILTIN_PADDW256,
24616 IX86_BUILTIN_PADDD256,
24617 IX86_BUILTIN_PADDQ256,
24618 IX86_BUILTIN_PADDSB256,
24619 IX86_BUILTIN_PADDSW256,
24620 IX86_BUILTIN_PADDUSB256,
24621 IX86_BUILTIN_PADDUSW256,
24622 IX86_BUILTIN_PALIGNR256,
24623 IX86_BUILTIN_AND256I,
24624 IX86_BUILTIN_ANDNOT256I,
24625 IX86_BUILTIN_PAVGB256,
24626 IX86_BUILTIN_PAVGW256,
24627 IX86_BUILTIN_PBLENDVB256,
24628 IX86_BUILTIN_PBLENDVW256,
24629 IX86_BUILTIN_PCMPEQB256,
24630 IX86_BUILTIN_PCMPEQW256,
24631 IX86_BUILTIN_PCMPEQD256,
24632 IX86_BUILTIN_PCMPEQQ256,
24633 IX86_BUILTIN_PCMPGTB256,
24634 IX86_BUILTIN_PCMPGTW256,
24635 IX86_BUILTIN_PCMPGTD256,
24636 IX86_BUILTIN_PCMPGTQ256,
24637 IX86_BUILTIN_PHADDW256,
24638 IX86_BUILTIN_PHADDD256,
24639 IX86_BUILTIN_PHADDSW256,
24640 IX86_BUILTIN_PHSUBW256,
24641 IX86_BUILTIN_PHSUBD256,
24642 IX86_BUILTIN_PHSUBSW256,
24643 IX86_BUILTIN_PMADDUBSW256,
24644 IX86_BUILTIN_PMADDWD256,
24645 IX86_BUILTIN_PMAXSB256,
24646 IX86_BUILTIN_PMAXSW256,
24647 IX86_BUILTIN_PMAXSD256,
24648 IX86_BUILTIN_PMAXUB256,
24649 IX86_BUILTIN_PMAXUW256,
24650 IX86_BUILTIN_PMAXUD256,
24651 IX86_BUILTIN_PMINSB256,
24652 IX86_BUILTIN_PMINSW256,
24653 IX86_BUILTIN_PMINSD256,
24654 IX86_BUILTIN_PMINUB256,
24655 IX86_BUILTIN_PMINUW256,
24656 IX86_BUILTIN_PMINUD256,
24657 IX86_BUILTIN_PMOVMSKB256,
24658 IX86_BUILTIN_PMOVSXBW256,
24659 IX86_BUILTIN_PMOVSXBD256,
24660 IX86_BUILTIN_PMOVSXBQ256,
24661 IX86_BUILTIN_PMOVSXWD256,
24662 IX86_BUILTIN_PMOVSXWQ256,
24663 IX86_BUILTIN_PMOVSXDQ256,
24664 IX86_BUILTIN_PMOVZXBW256,
24665 IX86_BUILTIN_PMOVZXBD256,
24666 IX86_BUILTIN_PMOVZXBQ256,
24667 IX86_BUILTIN_PMOVZXWD256,
24668 IX86_BUILTIN_PMOVZXWQ256,
24669 IX86_BUILTIN_PMOVZXDQ256,
24670 IX86_BUILTIN_PMULDQ256,
24671 IX86_BUILTIN_PMULHRSW256,
24672 IX86_BUILTIN_PMULHUW256,
24673 IX86_BUILTIN_PMULHW256,
24674 IX86_BUILTIN_PMULLW256,
24675 IX86_BUILTIN_PMULLD256,
24676 IX86_BUILTIN_PMULUDQ256,
24677 IX86_BUILTIN_POR256,
24678 IX86_BUILTIN_PSADBW256,
24679 IX86_BUILTIN_PSHUFB256,
24680 IX86_BUILTIN_PSHUFD256,
24681 IX86_BUILTIN_PSHUFHW256,
24682 IX86_BUILTIN_PSHUFLW256,
24683 IX86_BUILTIN_PSIGNB256,
24684 IX86_BUILTIN_PSIGNW256,
24685 IX86_BUILTIN_PSIGND256,
24686 IX86_BUILTIN_PSLLDQI256,
24687 IX86_BUILTIN_PSLLWI256,
24688 IX86_BUILTIN_PSLLW256,
24689 IX86_BUILTIN_PSLLDI256,
24690 IX86_BUILTIN_PSLLD256,
24691 IX86_BUILTIN_PSLLQI256,
24692 IX86_BUILTIN_PSLLQ256,
24693 IX86_BUILTIN_PSRAWI256,
24694 IX86_BUILTIN_PSRAW256,
24695 IX86_BUILTIN_PSRADI256,
24696 IX86_BUILTIN_PSRAD256,
24697 IX86_BUILTIN_PSRLDQI256,
24698 IX86_BUILTIN_PSRLWI256,
24699 IX86_BUILTIN_PSRLW256,
24700 IX86_BUILTIN_PSRLDI256,
24701 IX86_BUILTIN_PSRLD256,
24702 IX86_BUILTIN_PSRLQI256,
24703 IX86_BUILTIN_PSRLQ256,
24704 IX86_BUILTIN_PSUBB256,
24705 IX86_BUILTIN_PSUBW256,
24706 IX86_BUILTIN_PSUBD256,
24707 IX86_BUILTIN_PSUBQ256,
24708 IX86_BUILTIN_PSUBSB256,
24709 IX86_BUILTIN_PSUBSW256,
24710 IX86_BUILTIN_PSUBUSB256,
24711 IX86_BUILTIN_PSUBUSW256,
24712 IX86_BUILTIN_PUNPCKHBW256,
24713 IX86_BUILTIN_PUNPCKHWD256,
24714 IX86_BUILTIN_PUNPCKHDQ256,
24715 IX86_BUILTIN_PUNPCKHQDQ256,
24716 IX86_BUILTIN_PUNPCKLBW256,
24717 IX86_BUILTIN_PUNPCKLWD256,
24718 IX86_BUILTIN_PUNPCKLDQ256,
24719 IX86_BUILTIN_PUNPCKLQDQ256,
24720 IX86_BUILTIN_PXOR256,
24721 IX86_BUILTIN_MOVNTDQA256,
24722 IX86_BUILTIN_VBROADCASTSS_PS,
24723 IX86_BUILTIN_VBROADCASTSS_PS256,
24724 IX86_BUILTIN_VBROADCASTSD_PD256,
24725 IX86_BUILTIN_VBROADCASTSI256,
24726 IX86_BUILTIN_PBLENDD256,
24727 IX86_BUILTIN_PBLENDD128,
24728 IX86_BUILTIN_PBROADCASTB256,
24729 IX86_BUILTIN_PBROADCASTW256,
24730 IX86_BUILTIN_PBROADCASTD256,
24731 IX86_BUILTIN_PBROADCASTQ256,
24732 IX86_BUILTIN_PBROADCASTB128,
24733 IX86_BUILTIN_PBROADCASTW128,
24734 IX86_BUILTIN_PBROADCASTD128,
24735 IX86_BUILTIN_PBROADCASTQ128,
24736 IX86_BUILTIN_VPERMVARSI256,
24737 IX86_BUILTIN_VPERMDF256,
24738 IX86_BUILTIN_VPERMVARSF256,
24739 IX86_BUILTIN_VPERMDI256,
24740 IX86_BUILTIN_VPERMTI256,
24741 IX86_BUILTIN_VEXTRACT128I256,
24742 IX86_BUILTIN_VINSERT128I256,
24743 IX86_BUILTIN_MASKLOADD,
24744 IX86_BUILTIN_MASKLOADQ,
24745 IX86_BUILTIN_MASKLOADD256,
24746 IX86_BUILTIN_MASKLOADQ256,
24747 IX86_BUILTIN_MASKSTORED,
24748 IX86_BUILTIN_MASKSTOREQ,
24749 IX86_BUILTIN_MASKSTORED256,
24750 IX86_BUILTIN_MASKSTOREQ256,
24751 IX86_BUILTIN_PSLLVV4DI,
24752 IX86_BUILTIN_PSLLVV2DI,
24753 IX86_BUILTIN_PSLLVV8SI,
24754 IX86_BUILTIN_PSLLVV4SI,
24755 IX86_BUILTIN_PSRAVV8SI,
24756 IX86_BUILTIN_PSRAVV4SI,
24757 IX86_BUILTIN_PSRLVV4DI,
24758 IX86_BUILTIN_PSRLVV2DI,
24759 IX86_BUILTIN_PSRLVV8SI,
24760 IX86_BUILTIN_PSRLVV4SI,
24762 IX86_BUILTIN_GATHERSIV2DF,
24763 IX86_BUILTIN_GATHERSIV4DF,
24764 IX86_BUILTIN_GATHERDIV2DF,
24765 IX86_BUILTIN_GATHERDIV4DF,
24766 IX86_BUILTIN_GATHERSIV4SF,
24767 IX86_BUILTIN_GATHERSIV8SF,
24768 IX86_BUILTIN_GATHERDIV4SF,
24769 IX86_BUILTIN_GATHERDIV8SF,
24770 IX86_BUILTIN_GATHERSIV2DI,
24771 IX86_BUILTIN_GATHERSIV4DI,
24772 IX86_BUILTIN_GATHERDIV2DI,
24773 IX86_BUILTIN_GATHERDIV4DI,
24774 IX86_BUILTIN_GATHERSIV4SI,
24775 IX86_BUILTIN_GATHERSIV8SI,
24776 IX86_BUILTIN_GATHERDIV4SI,
24777 IX86_BUILTIN_GATHERDIV8SI,
24779 /* TFmode support builtins. */
24781 IX86_BUILTIN_HUGE_VALQ,
24782 IX86_BUILTIN_FABSQ,
24783 IX86_BUILTIN_COPYSIGNQ,
24785 /* Vectorizer support builtins. */
24786 IX86_BUILTIN_CPYSGNPS,
24787 IX86_BUILTIN_CPYSGNPD,
24788 IX86_BUILTIN_CPYSGNPS256,
24789 IX86_BUILTIN_CPYSGNPD256,
24791 IX86_BUILTIN_CVTUDQ2PS,
24793 IX86_BUILTIN_VEC_PERM_V2DF,
24794 IX86_BUILTIN_VEC_PERM_V4SF,
24795 IX86_BUILTIN_VEC_PERM_V2DI,
24796 IX86_BUILTIN_VEC_PERM_V4SI,
24797 IX86_BUILTIN_VEC_PERM_V8HI,
24798 IX86_BUILTIN_VEC_PERM_V16QI,
24799 IX86_BUILTIN_VEC_PERM_V2DI_U,
24800 IX86_BUILTIN_VEC_PERM_V4SI_U,
24801 IX86_BUILTIN_VEC_PERM_V8HI_U,
24802 IX86_BUILTIN_VEC_PERM_V16QI_U,
24803 IX86_BUILTIN_VEC_PERM_V4DF,
24804 IX86_BUILTIN_VEC_PERM_V8SF,
24806 /* FMA4 instructions. */
24807 IX86_BUILTIN_VFMADDSS,
24808 IX86_BUILTIN_VFMADDSD,
24809 IX86_BUILTIN_VFMADDPS,
24810 IX86_BUILTIN_VFMADDPD,
24811 IX86_BUILTIN_VFMADDPS256,
24812 IX86_BUILTIN_VFMADDPD256,
24813 IX86_BUILTIN_VFMADDSUBPS,
24814 IX86_BUILTIN_VFMADDSUBPD,
24815 IX86_BUILTIN_VFMADDSUBPS256,
24816 IX86_BUILTIN_VFMADDSUBPD256,
24818 /* FMA3 instructions. */
24819 IX86_BUILTIN_VFMADDSS3,
24820 IX86_BUILTIN_VFMADDSD3,
24822 /* XOP instructions. */
24823 IX86_BUILTIN_VPCMOV,
24824 IX86_BUILTIN_VPCMOV_V2DI,
24825 IX86_BUILTIN_VPCMOV_V4SI,
24826 IX86_BUILTIN_VPCMOV_V8HI,
24827 IX86_BUILTIN_VPCMOV_V16QI,
24828 IX86_BUILTIN_VPCMOV_V4SF,
24829 IX86_BUILTIN_VPCMOV_V2DF,
24830 IX86_BUILTIN_VPCMOV256,
24831 IX86_BUILTIN_VPCMOV_V4DI256,
24832 IX86_BUILTIN_VPCMOV_V8SI256,
24833 IX86_BUILTIN_VPCMOV_V16HI256,
24834 IX86_BUILTIN_VPCMOV_V32QI256,
24835 IX86_BUILTIN_VPCMOV_V8SF256,
24836 IX86_BUILTIN_VPCMOV_V4DF256,
24838 IX86_BUILTIN_VPPERM,
24840 IX86_BUILTIN_VPMACSSWW,
24841 IX86_BUILTIN_VPMACSWW,
24842 IX86_BUILTIN_VPMACSSWD,
24843 IX86_BUILTIN_VPMACSWD,
24844 IX86_BUILTIN_VPMACSSDD,
24845 IX86_BUILTIN_VPMACSDD,
24846 IX86_BUILTIN_VPMACSSDQL,
24847 IX86_BUILTIN_VPMACSSDQH,
24848 IX86_BUILTIN_VPMACSDQL,
24849 IX86_BUILTIN_VPMACSDQH,
24850 IX86_BUILTIN_VPMADCSSWD,
24851 IX86_BUILTIN_VPMADCSWD,
24853 IX86_BUILTIN_VPHADDBW,
24854 IX86_BUILTIN_VPHADDBD,
24855 IX86_BUILTIN_VPHADDBQ,
24856 IX86_BUILTIN_VPHADDWD,
24857 IX86_BUILTIN_VPHADDWQ,
24858 IX86_BUILTIN_VPHADDDQ,
24859 IX86_BUILTIN_VPHADDUBW,
24860 IX86_BUILTIN_VPHADDUBD,
24861 IX86_BUILTIN_VPHADDUBQ,
24862 IX86_BUILTIN_VPHADDUWD,
24863 IX86_BUILTIN_VPHADDUWQ,
24864 IX86_BUILTIN_VPHADDUDQ,
24865 IX86_BUILTIN_VPHSUBBW,
24866 IX86_BUILTIN_VPHSUBWD,
24867 IX86_BUILTIN_VPHSUBDQ,
24869 IX86_BUILTIN_VPROTB,
24870 IX86_BUILTIN_VPROTW,
24871 IX86_BUILTIN_VPROTD,
24872 IX86_BUILTIN_VPROTQ,
24873 IX86_BUILTIN_VPROTB_IMM,
24874 IX86_BUILTIN_VPROTW_IMM,
24875 IX86_BUILTIN_VPROTD_IMM,
24876 IX86_BUILTIN_VPROTQ_IMM,
24878 IX86_BUILTIN_VPSHLB,
24879 IX86_BUILTIN_VPSHLW,
24880 IX86_BUILTIN_VPSHLD,
24881 IX86_BUILTIN_VPSHLQ,
24882 IX86_BUILTIN_VPSHAB,
24883 IX86_BUILTIN_VPSHAW,
24884 IX86_BUILTIN_VPSHAD,
24885 IX86_BUILTIN_VPSHAQ,
24887 IX86_BUILTIN_VFRCZSS,
24888 IX86_BUILTIN_VFRCZSD,
24889 IX86_BUILTIN_VFRCZPS,
24890 IX86_BUILTIN_VFRCZPD,
24891 IX86_BUILTIN_VFRCZPS256,
24892 IX86_BUILTIN_VFRCZPD256,
24894 IX86_BUILTIN_VPCOMEQUB,
24895 IX86_BUILTIN_VPCOMNEUB,
24896 IX86_BUILTIN_VPCOMLTUB,
24897 IX86_BUILTIN_VPCOMLEUB,
24898 IX86_BUILTIN_VPCOMGTUB,
24899 IX86_BUILTIN_VPCOMGEUB,
24900 IX86_BUILTIN_VPCOMFALSEUB,
24901 IX86_BUILTIN_VPCOMTRUEUB,
24903 IX86_BUILTIN_VPCOMEQUW,
24904 IX86_BUILTIN_VPCOMNEUW,
24905 IX86_BUILTIN_VPCOMLTUW,
24906 IX86_BUILTIN_VPCOMLEUW,
24907 IX86_BUILTIN_VPCOMGTUW,
24908 IX86_BUILTIN_VPCOMGEUW,
24909 IX86_BUILTIN_VPCOMFALSEUW,
24910 IX86_BUILTIN_VPCOMTRUEUW,
24912 IX86_BUILTIN_VPCOMEQUD,
24913 IX86_BUILTIN_VPCOMNEUD,
24914 IX86_BUILTIN_VPCOMLTUD,
24915 IX86_BUILTIN_VPCOMLEUD,
24916 IX86_BUILTIN_VPCOMGTUD,
24917 IX86_BUILTIN_VPCOMGEUD,
24918 IX86_BUILTIN_VPCOMFALSEUD,
24919 IX86_BUILTIN_VPCOMTRUEUD,
24921 IX86_BUILTIN_VPCOMEQUQ,
24922 IX86_BUILTIN_VPCOMNEUQ,
24923 IX86_BUILTIN_VPCOMLTUQ,
24924 IX86_BUILTIN_VPCOMLEUQ,
24925 IX86_BUILTIN_VPCOMGTUQ,
24926 IX86_BUILTIN_VPCOMGEUQ,
24927 IX86_BUILTIN_VPCOMFALSEUQ,
24928 IX86_BUILTIN_VPCOMTRUEUQ,
24930 IX86_BUILTIN_VPCOMEQB,
24931 IX86_BUILTIN_VPCOMNEB,
24932 IX86_BUILTIN_VPCOMLTB,
24933 IX86_BUILTIN_VPCOMLEB,
24934 IX86_BUILTIN_VPCOMGTB,
24935 IX86_BUILTIN_VPCOMGEB,
24936 IX86_BUILTIN_VPCOMFALSEB,
24937 IX86_BUILTIN_VPCOMTRUEB,
24939 IX86_BUILTIN_VPCOMEQW,
24940 IX86_BUILTIN_VPCOMNEW,
24941 IX86_BUILTIN_VPCOMLTW,
24942 IX86_BUILTIN_VPCOMLEW,
24943 IX86_BUILTIN_VPCOMGTW,
24944 IX86_BUILTIN_VPCOMGEW,
24945 IX86_BUILTIN_VPCOMFALSEW,
24946 IX86_BUILTIN_VPCOMTRUEW,
24948 IX86_BUILTIN_VPCOMEQD,
24949 IX86_BUILTIN_VPCOMNED,
24950 IX86_BUILTIN_VPCOMLTD,
24951 IX86_BUILTIN_VPCOMLED,
24952 IX86_BUILTIN_VPCOMGTD,
24953 IX86_BUILTIN_VPCOMGED,
24954 IX86_BUILTIN_VPCOMFALSED,
24955 IX86_BUILTIN_VPCOMTRUED,
24957 IX86_BUILTIN_VPCOMEQQ,
24958 IX86_BUILTIN_VPCOMNEQ,
24959 IX86_BUILTIN_VPCOMLTQ,
24960 IX86_BUILTIN_VPCOMLEQ,
24961 IX86_BUILTIN_VPCOMGTQ,
24962 IX86_BUILTIN_VPCOMGEQ,
24963 IX86_BUILTIN_VPCOMFALSEQ,
24964 IX86_BUILTIN_VPCOMTRUEQ,
24966 /* LWP instructions. */
24967 IX86_BUILTIN_LLWPCB,
24968 IX86_BUILTIN_SLWPCB,
24969 IX86_BUILTIN_LWPVAL32,
24970 IX86_BUILTIN_LWPVAL64,
24971 IX86_BUILTIN_LWPINS32,
24972 IX86_BUILTIN_LWPINS64,
24976 /* BMI instructions. */
24977 IX86_BUILTIN_BEXTR32,
24978 IX86_BUILTIN_BEXTR64,
24981 /* TBM instructions. */
24982 IX86_BUILTIN_BEXTRI32,
24983 IX86_BUILTIN_BEXTRI64,
24985 /* BMI2 instructions. */
24986 IX86_BUILTIN_BZHI32,
24987 IX86_BUILTIN_BZHI64,
24988 IX86_BUILTIN_PDEP32,
24989 IX86_BUILTIN_PDEP64,
24990 IX86_BUILTIN_PEXT32,
24991 IX86_BUILTIN_PEXT64,
24993 /* FSGSBASE instructions. */
24994 IX86_BUILTIN_RDFSBASE32,
24995 IX86_BUILTIN_RDFSBASE64,
24996 IX86_BUILTIN_RDGSBASE32,
24997 IX86_BUILTIN_RDGSBASE64,
24998 IX86_BUILTIN_WRFSBASE32,
24999 IX86_BUILTIN_WRFSBASE64,
25000 IX86_BUILTIN_WRGSBASE32,
25001 IX86_BUILTIN_WRGSBASE64,
25003 /* RDRND instructions. */
25004 IX86_BUILTIN_RDRAND16_STEP,
25005 IX86_BUILTIN_RDRAND32_STEP,
25006 IX86_BUILTIN_RDRAND64_STEP,
25008 /* F16C instructions. */
25009 IX86_BUILTIN_CVTPH2PS,
25010 IX86_BUILTIN_CVTPH2PS256,
25011 IX86_BUILTIN_CVTPS2PH,
25012 IX86_BUILTIN_CVTPS2PH256,
25014 /* CFString built-in for darwin */
25015 IX86_BUILTIN_CFSTRING,
25020 /* Table for the ix86 builtin decls. */
25021 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
25023 /* Table of all of the builtin functions that are possible with different ISA's
25024 but are waiting to be built until a function is declared to use that
25026 struct builtin_isa {
25027 const char *name; /* function name */
25028 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
25029 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
25030 bool const_p; /* true if the declaration is constant */
25031 bool set_and_not_built_p;
25034 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
25037 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25038 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25039 function decl in the ix86_builtins array. Returns the function decl or
25040 NULL_TREE, if the builtin was not added.
25042 If the front end has a special hook for builtin functions, delay adding
25043 builtin functions that aren't in the current ISA until the ISA is changed
25044 with function specific optimization. Doing so, can save about 300K for the
25045 default compiler. When the builtin is expanded, check at that time whether
25048 If the front end doesn't have a special hook, record all builtins, even if
25049 it isn't an instruction set in the current ISA in case the user uses
25050 function specific options for a different ISA, so that we don't get scope
25051 errors if a builtin is added in the middle of a function scope. */
25054 def_builtin (HOST_WIDE_INT mask, const char *name,
25055 enum ix86_builtin_func_type tcode,
25056 enum ix86_builtins code)
25058 tree decl = NULL_TREE;
25060 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
25062 ix86_builtins_isa[(int) code].isa = mask;
25064 mask &= ~OPTION_MASK_ISA_64BIT;
25066 || (mask & ix86_isa_flags) != 0
25067 || (lang_hooks.builtin_function
25068 == lang_hooks.builtin_function_ext_scope))
25071 tree type = ix86_get_builtin_func_type (tcode);
25072 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
25074 ix86_builtins[(int) code] = decl;
25075 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
25079 ix86_builtins[(int) code] = NULL_TREE;
25080 ix86_builtins_isa[(int) code].tcode = tcode;
25081 ix86_builtins_isa[(int) code].name = name;
25082 ix86_builtins_isa[(int) code].const_p = false;
25083 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
25090 /* Like def_builtin, but also marks the function decl "const". */
25093 def_builtin_const (HOST_WIDE_INT mask, const char *name,
25094 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
25096 tree decl = def_builtin (mask, name, tcode, code);
25098 TREE_READONLY (decl) = 1;
25100 ix86_builtins_isa[(int) code].const_p = true;
25105 /* Add any new builtin functions for a given ISA that may not have been
25106 declared. This saves a bit of space compared to adding all of the
25107 declarations to the tree, even if we didn't use them. */
25110 ix86_add_new_builtins (HOST_WIDE_INT isa)
25114 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
25116 if ((ix86_builtins_isa[i].isa & isa) != 0
25117 && ix86_builtins_isa[i].set_and_not_built_p)
25121 /* Don't define the builtin again. */
25122 ix86_builtins_isa[i].set_and_not_built_p = false;
25124 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
25125 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
25126 type, i, BUILT_IN_MD, NULL,
25129 ix86_builtins[i] = decl;
25130 if (ix86_builtins_isa[i].const_p)
25131 TREE_READONLY (decl) = 1;
25136 /* Bits for builtin_description.flag. */
25138 /* Set when we don't support the comparison natively, and should
25139 swap_comparison in order to support it. */
25140 #define BUILTIN_DESC_SWAP_OPERANDS 1
25142 struct builtin_description
25144 const HOST_WIDE_INT mask;
25145 const enum insn_code icode;
25146 const char *const name;
25147 const enum ix86_builtins code;
25148 const enum rtx_code comparison;
25152 static const struct builtin_description bdesc_comi[] =
25154 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
25155 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
25156 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
25157 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
25158 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
25159 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
25160 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
25161 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
25162 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
25163 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
25164 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
25165 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
25166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
25167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
25168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
25169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
25170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
25171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
25172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
25173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
25174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
25175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
25176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
25177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
25180 static const struct builtin_description bdesc_pcmpestr[] =
25183 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
25184 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
25185 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
25186 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
25187 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
25188 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
25189 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
25192 static const struct builtin_description bdesc_pcmpistr[] =
25195 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
25196 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
25197 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
25198 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
25199 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
25200 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
25201 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
25204 /* Special builtins with variable number of arguments. */
25205 static const struct builtin_description bdesc_special_args[] =
25207 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
25208 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
25209 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
25212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
25215 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
25218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25220 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
25222 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
25223 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
25224 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
25225 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
25227 /* SSE or 3DNow!A */
25228 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
25229 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
25232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
25233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
25234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
25235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
25236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
25237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
25238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
25239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
25240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
25242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
25243 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
25246 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
25249 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
25252 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
25253 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25256 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
25257 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
25259 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
25260 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
25261 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
25262 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
25263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
25265 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
25266 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
25267 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
25268 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
25269 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
25270 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
25271 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
25273 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
25274 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
25275 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
25277 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
25278 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
25279 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
25280 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
25281 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
25282 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
25283 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
25284 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
25287 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
25288 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
25289 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
25290 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
25291 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
25292 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
25293 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
25294 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
25295 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
25297 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
25298 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
25299 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
25300 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
25301 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
25302 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
25305 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
25306 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
25307 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
25308 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
25309 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
25310 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
25311 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
25312 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
25315 /* Builtins with variable number of arguments. */
25316 static const struct builtin_description bdesc_args[] =
25318 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
25319 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
25320 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
25321 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
25322 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
25323 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
25324 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
25327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25330 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25331 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25337 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25343 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25351 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25352 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25359 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
25363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
25365 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
25366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
25367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
25369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
25371 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25372 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
25374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
25378 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25379 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25380 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
25381 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25382 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25383 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
25385 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25386 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25387 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25388 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25391 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
25392 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
25393 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25394 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25396 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25397 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25398 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25399 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25400 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25401 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25402 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25403 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25404 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25405 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25406 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25407 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25408 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25409 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25410 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25413 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
25414 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
25415 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25416 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25417 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25418 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
25422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25423 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25424 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25425 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
25428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
25429 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
25430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
25431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
25432 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
25434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25436 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25437 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25438 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25439 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25440 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
25446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
25447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
25448 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25449 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25451 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
25452 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
25453 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
25454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
25456 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
25458 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
25459 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
25460 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25461 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
25462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
25463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
25464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25465 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25468 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25469 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25473 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25475 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25476 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25478 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25483 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25484 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
25487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
25488 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
25490 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
25492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
25493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
25494 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
25496 /* SSE MMX or 3Dnow!A */
25497 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25498 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25499 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25501 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25502 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25503 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25504 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25506 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
25507 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
25509 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
25512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25514 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
25515 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
25516 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
25517 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
25518 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
25519 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25520 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
25521 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
25522 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
25523 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
25524 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
25525 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
25527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
25528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
25529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
25530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
25531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
25532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
25534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
25535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
25536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
25537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
25538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
25540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
25542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
25543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
25544 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
25545 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
25547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
25548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
25549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
25551 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25552 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25553 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25554 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
25565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25582 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25586 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25588 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25589 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25591 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25594 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25595 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25597 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
25599 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25600 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25601 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25602 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25603 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25604 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25605 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25606 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25617 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25618 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
25620 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25622 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25623 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25635 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25636 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25637 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25640 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25641 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25642 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25643 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25644 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25645 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25646 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25647 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25653 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
25656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
25657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
25661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
25662 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
25663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
25664 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
25666 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25667 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25668 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25669 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25670 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25671 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25672 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25675 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25676 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25677 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25678 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25679 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25680 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25682 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25683 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25684 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25685 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
25688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
25693 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
25694 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
25696 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25699 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25700 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25703 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
25704 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25706 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25707 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25708 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25709 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25710 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25711 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25714 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
25715 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
25716 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25717 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
25718 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
25719 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25721 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25722 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25723 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25724 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25725 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25726 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25727 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25728 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25729 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25730 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25731 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25732 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25733 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
25734 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
25735 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25736 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25737 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25738 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25739 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25740 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25741 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25742 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25743 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25744 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25747 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
25748 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
25751 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25752 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25753 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
25754 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
25755 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25756 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25757 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25758 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
25759 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25760 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
25762 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25763 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25764 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25765 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25766 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25767 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25768 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25769 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25770 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25771 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25772 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25773 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25774 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25776 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25777 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25778 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25779 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25780 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25781 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25782 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25783 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25784 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25785 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25786 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25787 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25790 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25791 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25792 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25793 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25795 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
25796 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
25797 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
25798 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
25800 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
25802 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
25803 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
25804 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
25805 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
25807 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25809 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25810 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25811 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25814 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25815 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
25816 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
25817 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25818 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25821 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
25822 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
25823 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
25824 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25827 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
25828 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25830 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25831 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25832 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25833 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25836 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
25839 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25840 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25843 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25844 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25847 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25853 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25854 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25855 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25856 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25857 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25858 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25859 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25860 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25861 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25862 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25863 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25864 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
25867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
25868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
25869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
25871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
25874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
25875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
25885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
25886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
25887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
25888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
25889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
25890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
25892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
25903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
25904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
25906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25910 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25912 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25914 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25919 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25921 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
25922 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
25923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
25924 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
25926 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25928 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
25929 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
25930 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
25931 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
25933 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25935 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25937 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25938 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25940 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25941 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25942 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25943 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
25944 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
25945 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25947 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25948 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25949 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25950 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25951 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25952 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25953 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25954 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25955 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25956 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25957 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25958 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25959 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25960 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25961 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25963 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25964 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25966 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25967 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
25971 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
25972 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
25973 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
25974 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
25975 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
25976 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
25977 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
25978 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25979 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25980 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25981 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25982 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25983 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25984 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25985 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25986 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv4di, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
25987 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25988 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25989 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25990 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25991 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
25992 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
25993 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25994 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25995 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25996 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25997 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25998 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25999 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26000 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26001 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26002 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26003 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26004 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26005 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26006 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26007 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26008 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
26009 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26010 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26011 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26012 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26013 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26014 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26015 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26016 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26017 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26018 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26019 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26020 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26021 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
26022 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26023 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26024 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26025 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26026 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26027 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26028 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26029 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26030 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26031 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26032 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26033 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26034 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26035 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26036 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26037 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26038 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26039 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26040 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26041 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26042 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26043 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26044 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
26045 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26046 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26047 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26048 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26049 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26050 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlqv4di3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
26051 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26052 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26053 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26054 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26055 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26056 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26057 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26058 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26059 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26060 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26061 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrqv4di3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
26062 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26063 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26064 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26065 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26066 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26067 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26068 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26069 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26070 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26071 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26072 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26073 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26074 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26075 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26076 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26077 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26078 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26079 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26080 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26081 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26082 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26083 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26084 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26085 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26086 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
26087 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
26088 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
26089 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
26090 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26091 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
26092 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
26093 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
26094 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
26095 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
26096 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26097 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
26098 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26099 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26100 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26101 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26102 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
26103 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
26104 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
26105 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
26106 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26107 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26108 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26109 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26110 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26111 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26112 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26113 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26114 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26115 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26117 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
26120 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26121 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26122 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
26125 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26126 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26129 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
26130 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
26131 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
26132 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
26135 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26136 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26137 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26138 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26139 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26140 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26143 /* FMA4 and XOP. */
26144 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
26145 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
26146 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
26147 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
26148 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
26149 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
26150 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
26151 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
26152 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
26153 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
26154 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
26155 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
26156 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
26157 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
26158 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
26159 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
26160 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
26161 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
26162 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
26163 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
26164 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
26165 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
26166 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
26167 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
26168 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
26169 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
26170 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
26171 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
26172 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
26173 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
26174 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
26175 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
26176 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
26177 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
26178 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
26179 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
26180 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
26181 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
26182 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
26183 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
26184 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
26185 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
26186 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
26187 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
26188 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
26189 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
26190 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
26191 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
26192 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
26193 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
26194 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
26195 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
26197 static const struct builtin_description bdesc_multi_arg[] =
26199 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
26200 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
26201 UNKNOWN, (int)MULTI_ARG_3_SF },
26202 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
26203 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
26204 UNKNOWN, (int)MULTI_ARG_3_DF },
26206 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
26207 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
26208 UNKNOWN, (int)MULTI_ARG_3_SF },
26209 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
26210 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
26211 UNKNOWN, (int)MULTI_ARG_3_DF },
26213 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
26214 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
26215 UNKNOWN, (int)MULTI_ARG_3_SF },
26216 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
26217 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
26218 UNKNOWN, (int)MULTI_ARG_3_DF },
26219 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
26220 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
26221 UNKNOWN, (int)MULTI_ARG_3_SF2 },
26222 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
26223 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
26224 UNKNOWN, (int)MULTI_ARG_3_DF2 },
26226 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
26227 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
26228 UNKNOWN, (int)MULTI_ARG_3_SF },
26229 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
26230 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
26231 UNKNOWN, (int)MULTI_ARG_3_DF },
26232 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
26233 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
26234 UNKNOWN, (int)MULTI_ARG_3_SF2 },
26235 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
26236 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
26237 UNKNOWN, (int)MULTI_ARG_3_DF2 },
26239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
26240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
26241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
26242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
26243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
26244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
26245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
26247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
26248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
26249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
26250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
26251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
26252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
26253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
26255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
26257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
26258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
26259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
26262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
26263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
26272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
26273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
26274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
26275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
26276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
26277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
26278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
26280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
26281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
26282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
26284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
26285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
26287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
26288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
26289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
26290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
26291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
26292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
26294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
26296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
26297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
26299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
26302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
26303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
26305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
26311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
26312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
26313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
26314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
26315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
26316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
26318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
26319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
26320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
26321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
26322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
26323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
26324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
26326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
26327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
26328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
26329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
26330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
26331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
26332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
26334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
26335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
26336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
26337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
26338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
26339 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
26340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
26342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
26343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
26344 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
26345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
26346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
26347 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
26348 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
26350 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
26351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
26352 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
26353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
26354 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
26355 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
26356 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
26358 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
26359 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
26360 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
26361 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
26362 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
26363 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
26364 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
26366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
26367 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
26368 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
26369 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
26370 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
26371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
26372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
26374 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
26375 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
26376 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
26377 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
26378 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
26379 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
26380 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
26381 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
26383 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
26384 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
26385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
26386 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
26387 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
26388 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
26389 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
26390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
26392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
26393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
26394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
26395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
26399 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
26400 in the current target ISA to allow the user to compile particular modules
26401 with different target specific options that differ from the command line
26404 ix86_init_mmx_sse_builtins (void)
26406 const struct builtin_description * d;
26407 enum ix86_builtin_func_type ftype;
26410 /* Add all special builtins with variable number of operands. */
26411 for (i = 0, d = bdesc_special_args;
26412 i < ARRAY_SIZE (bdesc_special_args);
26418 ftype = (enum ix86_builtin_func_type) d->flag;
26419 def_builtin (d->mask, d->name, ftype, d->code);
26422 /* Add all builtins with variable number of operands. */
26423 for (i = 0, d = bdesc_args;
26424 i < ARRAY_SIZE (bdesc_args);
26430 ftype = (enum ix86_builtin_func_type) d->flag;
26431 def_builtin_const (d->mask, d->name, ftype, d->code);
26434 /* pcmpestr[im] insns. */
26435 for (i = 0, d = bdesc_pcmpestr;
26436 i < ARRAY_SIZE (bdesc_pcmpestr);
26439 if (d->code == IX86_BUILTIN_PCMPESTRM128)
26440 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
26442 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
26443 def_builtin_const (d->mask, d->name, ftype, d->code);
26446 /* pcmpistr[im] insns. */
26447 for (i = 0, d = bdesc_pcmpistr;
26448 i < ARRAY_SIZE (bdesc_pcmpistr);
26451 if (d->code == IX86_BUILTIN_PCMPISTRM128)
26452 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
26454 ftype = INT_FTYPE_V16QI_V16QI_INT;
26455 def_builtin_const (d->mask, d->name, ftype, d->code);
26458 /* comi/ucomi insns. */
26459 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
26461 if (d->mask == OPTION_MASK_ISA_SSE2)
26462 ftype = INT_FTYPE_V2DF_V2DF;
26464 ftype = INT_FTYPE_V4SF_V4SF;
26465 def_builtin_const (d->mask, d->name, ftype, d->code);
26469 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
26470 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
26471 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
26472 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
26474 /* SSE or 3DNow!A */
26475 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
26476 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
26477 IX86_BUILTIN_MASKMOVQ);
26480 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
26481 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
26483 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
26484 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
26485 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
26486 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
26489 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
26490 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
26491 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
26492 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
26495 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
26496 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
26497 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
26498 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
26499 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
26500 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
26501 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
26502 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
26503 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
26504 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
26505 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
26506 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
26509 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
26510 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
26513 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
26514 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
26515 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
26516 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
26517 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
26518 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
26519 IX86_BUILTIN_RDRAND64_STEP);
26522 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
26523 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
26524 IX86_BUILTIN_GATHERSIV2DF);
26526 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
26527 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
26528 IX86_BUILTIN_GATHERSIV4DF);
26530 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
26531 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
26532 IX86_BUILTIN_GATHERDIV2DF);
26534 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
26535 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
26536 IX86_BUILTIN_GATHERDIV4DF);
26538 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
26539 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
26540 IX86_BUILTIN_GATHERSIV4SF);
26542 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
26543 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
26544 IX86_BUILTIN_GATHERSIV8SF);
26546 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
26547 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
26548 IX86_BUILTIN_GATHERDIV4SF);
26550 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
26551 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
26552 IX86_BUILTIN_GATHERDIV8SF);
26554 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
26555 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
26556 IX86_BUILTIN_GATHERSIV2DI);
26558 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
26559 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
26560 IX86_BUILTIN_GATHERSIV4DI);
26562 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
26563 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
26564 IX86_BUILTIN_GATHERDIV2DI);
26566 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
26567 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
26568 IX86_BUILTIN_GATHERDIV4DI);
26570 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
26571 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
26572 IX86_BUILTIN_GATHERSIV4SI);
26574 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
26575 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
26576 IX86_BUILTIN_GATHERSIV8SI);
26578 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
26579 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
26580 IX86_BUILTIN_GATHERDIV4SI);
26582 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
26583 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
26584 IX86_BUILTIN_GATHERDIV8SI);
26586 /* MMX access to the vec_init patterns. */
26587 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
26588 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
26590 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
26591 V4HI_FTYPE_HI_HI_HI_HI,
26592 IX86_BUILTIN_VEC_INIT_V4HI);
26594 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
26595 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
26596 IX86_BUILTIN_VEC_INIT_V8QI);
26598 /* Access to the vec_extract patterns. */
26599 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
26600 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
26601 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
26602 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
26603 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
26604 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
26605 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
26606 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
26607 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
26608 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
26610 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
26611 "__builtin_ia32_vec_ext_v4hi",
26612 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
26614 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
26615 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
26617 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
26618 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
26620 /* Access to the vec_set patterns. */
26621 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
26622 "__builtin_ia32_vec_set_v2di",
26623 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
26625 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
26626 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
26628 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
26629 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
26631 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
26632 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
26634 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
26635 "__builtin_ia32_vec_set_v4hi",
26636 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
26638 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
26639 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
26641 /* Add FMA4 multi-arg argument instructions */
26642 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
26647 ftype = (enum ix86_builtin_func_type) d->flag;
26648 def_builtin_const (d->mask, d->name, ftype, d->code);
26652 /* Internal method for ix86_init_builtins. */
26655 ix86_init_builtins_va_builtins_abi (void)
26657 tree ms_va_ref, sysv_va_ref;
26658 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
26659 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
26660 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
26661 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
26665 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
26666 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
26667 ms_va_ref = build_reference_type (ms_va_list_type_node);
26669 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
26672 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
26673 fnvoid_va_start_ms =
26674 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
26675 fnvoid_va_end_sysv =
26676 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
26677 fnvoid_va_start_sysv =
26678 build_varargs_function_type_list (void_type_node, sysv_va_ref,
26680 fnvoid_va_copy_ms =
26681 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
26683 fnvoid_va_copy_sysv =
26684 build_function_type_list (void_type_node, sysv_va_ref,
26685 sysv_va_ref, NULL_TREE);
26687 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
26688 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
26689 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
26690 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
26691 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
26692 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
26693 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
26694 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
26695 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
26696 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
26697 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
26698 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
26702 ix86_init_builtin_types (void)
26704 tree float128_type_node, float80_type_node;
26706 /* The __float80 type. */
26707 float80_type_node = long_double_type_node;
26708 if (TYPE_MODE (float80_type_node) != XFmode)
26710 /* The __float80 type. */
26711 float80_type_node = make_node (REAL_TYPE);
26713 TYPE_PRECISION (float80_type_node) = 80;
26714 layout_type (float80_type_node);
26716 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
26718 /* The __float128 type. */
26719 float128_type_node = make_node (REAL_TYPE);
26720 TYPE_PRECISION (float128_type_node) = 128;
26721 layout_type (float128_type_node);
26722 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
26724 /* This macro is built by i386-builtin-types.awk. */
26725 DEFINE_BUILTIN_PRIMITIVE_TYPES;
26729 ix86_init_builtins (void)
26733 ix86_init_builtin_types ();
26735 /* TFmode support builtins. */
26736 def_builtin_const (0, "__builtin_infq",
26737 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
26738 def_builtin_const (0, "__builtin_huge_valq",
26739 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
26741 /* We will expand them to normal call if SSE2 isn't available since
26742 they are used by libgcc. */
26743 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
26744 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
26745 BUILT_IN_MD, "__fabstf2", NULL_TREE);
26746 TREE_READONLY (t) = 1;
26747 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
26749 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
26750 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
26751 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
26752 TREE_READONLY (t) = 1;
26753 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
26755 ix86_init_mmx_sse_builtins ();
26758 ix86_init_builtins_va_builtins_abi ();
26760 #ifdef SUBTARGET_INIT_BUILTINS
26761 SUBTARGET_INIT_BUILTINS;
26765 /* Return the ix86 builtin for CODE. */
26768 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
26770 if (code >= IX86_BUILTIN_MAX)
26771 return error_mark_node;
26773 return ix86_builtins[code];
26776 /* Errors in the source file can cause expand_expr to return const0_rtx
26777 where we expect a vector. To avoid crashing, use one of the vector
26778 clear instructions. */
26780 safe_vector_operand (rtx x, enum machine_mode mode)
26782 if (x == const0_rtx)
26783 x = CONST0_RTX (mode);
26787 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
26790 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
26793 tree arg0 = CALL_EXPR_ARG (exp, 0);
26794 tree arg1 = CALL_EXPR_ARG (exp, 1);
26795 rtx op0 = expand_normal (arg0);
26796 rtx op1 = expand_normal (arg1);
26797 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26798 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26799 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
26801 if (VECTOR_MODE_P (mode0))
26802 op0 = safe_vector_operand (op0, mode0);
26803 if (VECTOR_MODE_P (mode1))
26804 op1 = safe_vector_operand (op1, mode1);
26806 if (optimize || !target
26807 || GET_MODE (target) != tmode
26808 || !insn_data[icode].operand[0].predicate (target, tmode))
26809 target = gen_reg_rtx (tmode);
26811 if (GET_MODE (op1) == SImode && mode1 == TImode)
26813 rtx x = gen_reg_rtx (V4SImode);
26814 emit_insn (gen_sse2_loadd (x, op1));
26815 op1 = gen_lowpart (TImode, x);
26818 if (!insn_data[icode].operand[1].predicate (op0, mode0))
26819 op0 = copy_to_mode_reg (mode0, op0);
26820 if (!insn_data[icode].operand[2].predicate (op1, mode1))
26821 op1 = copy_to_mode_reg (mode1, op1);
26823 pat = GEN_FCN (icode) (target, op0, op1);
26832 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
26835 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
26836 enum ix86_builtin_func_type m_type,
26837 enum rtx_code sub_code)
26842 bool comparison_p = false;
26844 bool last_arg_constant = false;
26845 int num_memory = 0;
26848 enum machine_mode mode;
26851 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26855 case MULTI_ARG_4_DF2_DI_I:
26856 case MULTI_ARG_4_DF2_DI_I1:
26857 case MULTI_ARG_4_SF2_SI_I:
26858 case MULTI_ARG_4_SF2_SI_I1:
26860 last_arg_constant = true;
26863 case MULTI_ARG_3_SF:
26864 case MULTI_ARG_3_DF:
26865 case MULTI_ARG_3_SF2:
26866 case MULTI_ARG_3_DF2:
26867 case MULTI_ARG_3_DI:
26868 case MULTI_ARG_3_SI:
26869 case MULTI_ARG_3_SI_DI:
26870 case MULTI_ARG_3_HI:
26871 case MULTI_ARG_3_HI_SI:
26872 case MULTI_ARG_3_QI:
26873 case MULTI_ARG_3_DI2:
26874 case MULTI_ARG_3_SI2:
26875 case MULTI_ARG_3_HI2:
26876 case MULTI_ARG_3_QI2:
26880 case MULTI_ARG_2_SF:
26881 case MULTI_ARG_2_DF:
26882 case MULTI_ARG_2_DI:
26883 case MULTI_ARG_2_SI:
26884 case MULTI_ARG_2_HI:
26885 case MULTI_ARG_2_QI:
26889 case MULTI_ARG_2_DI_IMM:
26890 case MULTI_ARG_2_SI_IMM:
26891 case MULTI_ARG_2_HI_IMM:
26892 case MULTI_ARG_2_QI_IMM:
26894 last_arg_constant = true;
26897 case MULTI_ARG_1_SF:
26898 case MULTI_ARG_1_DF:
26899 case MULTI_ARG_1_SF2:
26900 case MULTI_ARG_1_DF2:
26901 case MULTI_ARG_1_DI:
26902 case MULTI_ARG_1_SI:
26903 case MULTI_ARG_1_HI:
26904 case MULTI_ARG_1_QI:
26905 case MULTI_ARG_1_SI_DI:
26906 case MULTI_ARG_1_HI_DI:
26907 case MULTI_ARG_1_HI_SI:
26908 case MULTI_ARG_1_QI_DI:
26909 case MULTI_ARG_1_QI_SI:
26910 case MULTI_ARG_1_QI_HI:
26914 case MULTI_ARG_2_DI_CMP:
26915 case MULTI_ARG_2_SI_CMP:
26916 case MULTI_ARG_2_HI_CMP:
26917 case MULTI_ARG_2_QI_CMP:
26919 comparison_p = true;
26922 case MULTI_ARG_2_SF_TF:
26923 case MULTI_ARG_2_DF_TF:
26924 case MULTI_ARG_2_DI_TF:
26925 case MULTI_ARG_2_SI_TF:
26926 case MULTI_ARG_2_HI_TF:
26927 case MULTI_ARG_2_QI_TF:
26933 gcc_unreachable ();
26936 if (optimize || !target
26937 || GET_MODE (target) != tmode
26938 || !insn_data[icode].operand[0].predicate (target, tmode))
26939 target = gen_reg_rtx (tmode);
26941 gcc_assert (nargs <= 4);
26943 for (i = 0; i < nargs; i++)
26945 tree arg = CALL_EXPR_ARG (exp, i);
26946 rtx op = expand_normal (arg);
26947 int adjust = (comparison_p) ? 1 : 0;
26948 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
26950 if (last_arg_constant && i == nargs - 1)
26952 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
26954 enum insn_code new_icode = icode;
26957 case CODE_FOR_xop_vpermil2v2df3:
26958 case CODE_FOR_xop_vpermil2v4sf3:
26959 case CODE_FOR_xop_vpermil2v4df3:
26960 case CODE_FOR_xop_vpermil2v8sf3:
26961 error ("the last argument must be a 2-bit immediate");
26962 return gen_reg_rtx (tmode);
26963 case CODE_FOR_xop_rotlv2di3:
26964 new_icode = CODE_FOR_rotlv2di3;
26966 case CODE_FOR_xop_rotlv4si3:
26967 new_icode = CODE_FOR_rotlv4si3;
26969 case CODE_FOR_xop_rotlv8hi3:
26970 new_icode = CODE_FOR_rotlv8hi3;
26972 case CODE_FOR_xop_rotlv16qi3:
26973 new_icode = CODE_FOR_rotlv16qi3;
26975 if (CONST_INT_P (op))
26977 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
26978 op = GEN_INT (INTVAL (op) & mask);
26979 gcc_checking_assert
26980 (insn_data[icode].operand[i + 1].predicate (op, mode));
26984 gcc_checking_assert
26986 && insn_data[new_icode].operand[0].mode == tmode
26987 && insn_data[new_icode].operand[1].mode == tmode
26988 && insn_data[new_icode].operand[2].mode == mode
26989 && insn_data[new_icode].operand[0].predicate
26990 == insn_data[icode].operand[0].predicate
26991 && insn_data[new_icode].operand[1].predicate
26992 == insn_data[icode].operand[1].predicate);
26998 gcc_unreachable ();
27005 if (VECTOR_MODE_P (mode))
27006 op = safe_vector_operand (op, mode);
27008 /* If we aren't optimizing, only allow one memory operand to be
27010 if (memory_operand (op, mode))
27013 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
27016 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
27018 op = force_reg (mode, op);
27022 args[i].mode = mode;
27028 pat = GEN_FCN (icode) (target, args[0].op);
27033 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
27034 GEN_INT ((int)sub_code));
27035 else if (! comparison_p)
27036 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27039 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
27043 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
27048 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
27052 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
27056 gcc_unreachable ();
27066 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
27067 insns with vec_merge. */
27070 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
27074 tree arg0 = CALL_EXPR_ARG (exp, 0);
27075 rtx op1, op0 = expand_normal (arg0);
27076 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27077 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
27079 if (optimize || !target
27080 || GET_MODE (target) != tmode
27081 || !insn_data[icode].operand[0].predicate (target, tmode))
27082 target = gen_reg_rtx (tmode);
27084 if (VECTOR_MODE_P (mode0))
27085 op0 = safe_vector_operand (op0, mode0);
27087 if ((optimize && !register_operand (op0, mode0))
27088 || !insn_data[icode].operand[1].predicate (op0, mode0))
27089 op0 = copy_to_mode_reg (mode0, op0);
27092 if (!insn_data[icode].operand[2].predicate (op1, mode0))
27093 op1 = copy_to_mode_reg (mode0, op1);
27095 pat = GEN_FCN (icode) (target, op0, op1);
27102 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
27105 ix86_expand_sse_compare (const struct builtin_description *d,
27106 tree exp, rtx target, bool swap)
27109 tree arg0 = CALL_EXPR_ARG (exp, 0);
27110 tree arg1 = CALL_EXPR_ARG (exp, 1);
27111 rtx op0 = expand_normal (arg0);
27112 rtx op1 = expand_normal (arg1);
27114 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
27115 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
27116 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
27117 enum rtx_code comparison = d->comparison;
27119 if (VECTOR_MODE_P (mode0))
27120 op0 = safe_vector_operand (op0, mode0);
27121 if (VECTOR_MODE_P (mode1))
27122 op1 = safe_vector_operand (op1, mode1);
27124 /* Swap operands if we have a comparison that isn't available in
27128 rtx tmp = gen_reg_rtx (mode1);
27129 emit_move_insn (tmp, op1);
27134 if (optimize || !target
27135 || GET_MODE (target) != tmode
27136 || !insn_data[d->icode].operand[0].predicate (target, tmode))
27137 target = gen_reg_rtx (tmode);
27139 if ((optimize && !register_operand (op0, mode0))
27140 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
27141 op0 = copy_to_mode_reg (mode0, op0);
27142 if ((optimize && !register_operand (op1, mode1))
27143 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
27144 op1 = copy_to_mode_reg (mode1, op1);
27146 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
27147 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
27154 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
27157 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
27161 tree arg0 = CALL_EXPR_ARG (exp, 0);
27162 tree arg1 = CALL_EXPR_ARG (exp, 1);
27163 rtx op0 = expand_normal (arg0);
27164 rtx op1 = expand_normal (arg1);
27165 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
27166 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
27167 enum rtx_code comparison = d->comparison;
27169 if (VECTOR_MODE_P (mode0))
27170 op0 = safe_vector_operand (op0, mode0);
27171 if (VECTOR_MODE_P (mode1))
27172 op1 = safe_vector_operand (op1, mode1);
27174 /* Swap operands if we have a comparison that isn't available in
27176 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
27183 target = gen_reg_rtx (SImode);
27184 emit_move_insn (target, const0_rtx);
27185 target = gen_rtx_SUBREG (QImode, target, 0);
27187 if ((optimize && !register_operand (op0, mode0))
27188 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
27189 op0 = copy_to_mode_reg (mode0, op0);
27190 if ((optimize && !register_operand (op1, mode1))
27191 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
27192 op1 = copy_to_mode_reg (mode1, op1);
27194 pat = GEN_FCN (d->icode) (op0, op1);
27198 emit_insn (gen_rtx_SET (VOIDmode,
27199 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27200 gen_rtx_fmt_ee (comparison, QImode,
27204 return SUBREG_REG (target);
27207 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
27210 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
27214 tree arg0 = CALL_EXPR_ARG (exp, 0);
27215 rtx op1, op0 = expand_normal (arg0);
27216 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
27217 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
27219 if (optimize || target == 0
27220 || GET_MODE (target) != tmode
27221 || !insn_data[d->icode].operand[0].predicate (target, tmode))
27222 target = gen_reg_rtx (tmode);
27224 if (VECTOR_MODE_P (mode0))
27225 op0 = safe_vector_operand (op0, mode0);
27227 if ((optimize && !register_operand (op0, mode0))
27228 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
27229 op0 = copy_to_mode_reg (mode0, op0);
27231 op1 = GEN_INT (d->comparison);
27233 pat = GEN_FCN (d->icode) (target, op0, op1);
27240 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
27243 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
27247 tree arg0 = CALL_EXPR_ARG (exp, 0);
27248 tree arg1 = CALL_EXPR_ARG (exp, 1);
27249 rtx op0 = expand_normal (arg0);
27250 rtx op1 = expand_normal (arg1);
27251 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
27252 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
27253 enum rtx_code comparison = d->comparison;
27255 if (VECTOR_MODE_P (mode0))
27256 op0 = safe_vector_operand (op0, mode0);
27257 if (VECTOR_MODE_P (mode1))
27258 op1 = safe_vector_operand (op1, mode1);
27260 target = gen_reg_rtx (SImode);
27261 emit_move_insn (target, const0_rtx);
27262 target = gen_rtx_SUBREG (QImode, target, 0);
27264 if ((optimize && !register_operand (op0, mode0))
27265 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
27266 op0 = copy_to_mode_reg (mode0, op0);
27267 if ((optimize && !register_operand (op1, mode1))
27268 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
27269 op1 = copy_to_mode_reg (mode1, op1);
27271 pat = GEN_FCN (d->icode) (op0, op1);
27275 emit_insn (gen_rtx_SET (VOIDmode,
27276 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27277 gen_rtx_fmt_ee (comparison, QImode,
27281 return SUBREG_REG (target);
27284 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
27287 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
27288 tree exp, rtx target)
27291 tree arg0 = CALL_EXPR_ARG (exp, 0);
27292 tree arg1 = CALL_EXPR_ARG (exp, 1);
27293 tree arg2 = CALL_EXPR_ARG (exp, 2);
27294 tree arg3 = CALL_EXPR_ARG (exp, 3);
27295 tree arg4 = CALL_EXPR_ARG (exp, 4);
27296 rtx scratch0, scratch1;
27297 rtx op0 = expand_normal (arg0);
27298 rtx op1 = expand_normal (arg1);
27299 rtx op2 = expand_normal (arg2);
27300 rtx op3 = expand_normal (arg3);
27301 rtx op4 = expand_normal (arg4);
27302 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
27304 tmode0 = insn_data[d->icode].operand[0].mode;
27305 tmode1 = insn_data[d->icode].operand[1].mode;
27306 modev2 = insn_data[d->icode].operand[2].mode;
27307 modei3 = insn_data[d->icode].operand[3].mode;
27308 modev4 = insn_data[d->icode].operand[4].mode;
27309 modei5 = insn_data[d->icode].operand[5].mode;
27310 modeimm = insn_data[d->icode].operand[6].mode;
27312 if (VECTOR_MODE_P (modev2))
27313 op0 = safe_vector_operand (op0, modev2);
27314 if (VECTOR_MODE_P (modev4))
27315 op2 = safe_vector_operand (op2, modev4);
27317 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
27318 op0 = copy_to_mode_reg (modev2, op0);
27319 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
27320 op1 = copy_to_mode_reg (modei3, op1);
27321 if ((optimize && !register_operand (op2, modev4))
27322 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
27323 op2 = copy_to_mode_reg (modev4, op2);
27324 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
27325 op3 = copy_to_mode_reg (modei5, op3);
27327 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
27329 error ("the fifth argument must be an 8-bit immediate");
27333 if (d->code == IX86_BUILTIN_PCMPESTRI128)
27335 if (optimize || !target
27336 || GET_MODE (target) != tmode0
27337 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
27338 target = gen_reg_rtx (tmode0);
27340 scratch1 = gen_reg_rtx (tmode1);
27342 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
27344 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
27346 if (optimize || !target
27347 || GET_MODE (target) != tmode1
27348 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
27349 target = gen_reg_rtx (tmode1);
27351 scratch0 = gen_reg_rtx (tmode0);
27353 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
27357 gcc_assert (d->flag);
27359 scratch0 = gen_reg_rtx (tmode0);
27360 scratch1 = gen_reg_rtx (tmode1);
27362 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
27372 target = gen_reg_rtx (SImode);
27373 emit_move_insn (target, const0_rtx);
27374 target = gen_rtx_SUBREG (QImode, target, 0);
27377 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27378 gen_rtx_fmt_ee (EQ, QImode,
27379 gen_rtx_REG ((enum machine_mode) d->flag,
27382 return SUBREG_REG (target);
27389 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
27392 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
27393 tree exp, rtx target)
27396 tree arg0 = CALL_EXPR_ARG (exp, 0);
27397 tree arg1 = CALL_EXPR_ARG (exp, 1);
27398 tree arg2 = CALL_EXPR_ARG (exp, 2);
27399 rtx scratch0, scratch1;
27400 rtx op0 = expand_normal (arg0);
27401 rtx op1 = expand_normal (arg1);
27402 rtx op2 = expand_normal (arg2);
27403 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
27405 tmode0 = insn_data[d->icode].operand[0].mode;
27406 tmode1 = insn_data[d->icode].operand[1].mode;
27407 modev2 = insn_data[d->icode].operand[2].mode;
27408 modev3 = insn_data[d->icode].operand[3].mode;
27409 modeimm = insn_data[d->icode].operand[4].mode;
27411 if (VECTOR_MODE_P (modev2))
27412 op0 = safe_vector_operand (op0, modev2);
27413 if (VECTOR_MODE_P (modev3))
27414 op1 = safe_vector_operand (op1, modev3);
27416 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
27417 op0 = copy_to_mode_reg (modev2, op0);
27418 if ((optimize && !register_operand (op1, modev3))
27419 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
27420 op1 = copy_to_mode_reg (modev3, op1);
27422 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
27424 error ("the third argument must be an 8-bit immediate");
27428 if (d->code == IX86_BUILTIN_PCMPISTRI128)
27430 if (optimize || !target
27431 || GET_MODE (target) != tmode0
27432 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
27433 target = gen_reg_rtx (tmode0);
27435 scratch1 = gen_reg_rtx (tmode1);
27437 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
27439 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
27441 if (optimize || !target
27442 || GET_MODE (target) != tmode1
27443 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
27444 target = gen_reg_rtx (tmode1);
27446 scratch0 = gen_reg_rtx (tmode0);
27448 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
27452 gcc_assert (d->flag);
27454 scratch0 = gen_reg_rtx (tmode0);
27455 scratch1 = gen_reg_rtx (tmode1);
27457 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
27467 target = gen_reg_rtx (SImode);
27468 emit_move_insn (target, const0_rtx);
27469 target = gen_rtx_SUBREG (QImode, target, 0);
27472 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27473 gen_rtx_fmt_ee (EQ, QImode,
27474 gen_rtx_REG ((enum machine_mode) d->flag,
27477 return SUBREG_REG (target);
27483 /* Subroutine of ix86_expand_builtin to take care of insns with
27484 variable number of operands. */
27487 ix86_expand_args_builtin (const struct builtin_description *d,
27488 tree exp, rtx target)
27490 rtx pat, real_target;
27491 unsigned int i, nargs;
27492 unsigned int nargs_constant = 0;
27493 int num_memory = 0;
27497 enum machine_mode mode;
27499 bool last_arg_count = false;
27500 enum insn_code icode = d->icode;
27501 const struct insn_data_d *insn_p = &insn_data[icode];
27502 enum machine_mode tmode = insn_p->operand[0].mode;
27503 enum machine_mode rmode = VOIDmode;
27505 enum rtx_code comparison = d->comparison;
27507 switch ((enum ix86_builtin_func_type) d->flag)
27509 case V2DF_FTYPE_V2DF_ROUND:
27510 case V4DF_FTYPE_V4DF_ROUND:
27511 case V4SF_FTYPE_V4SF_ROUND:
27512 case V8SF_FTYPE_V8SF_ROUND:
27513 return ix86_expand_sse_round (d, exp, target);
27514 case INT_FTYPE_V8SF_V8SF_PTEST:
27515 case INT_FTYPE_V4DI_V4DI_PTEST:
27516 case INT_FTYPE_V4DF_V4DF_PTEST:
27517 case INT_FTYPE_V4SF_V4SF_PTEST:
27518 case INT_FTYPE_V2DI_V2DI_PTEST:
27519 case INT_FTYPE_V2DF_V2DF_PTEST:
27520 return ix86_expand_sse_ptest (d, exp, target);
27521 case FLOAT128_FTYPE_FLOAT128:
27522 case FLOAT_FTYPE_FLOAT:
27523 case INT_FTYPE_INT:
27524 case UINT64_FTYPE_INT:
27525 case UINT16_FTYPE_UINT16:
27526 case INT64_FTYPE_INT64:
27527 case INT64_FTYPE_V4SF:
27528 case INT64_FTYPE_V2DF:
27529 case INT_FTYPE_V16QI:
27530 case INT_FTYPE_V8QI:
27531 case INT_FTYPE_V8SF:
27532 case INT_FTYPE_V4DF:
27533 case INT_FTYPE_V4SF:
27534 case INT_FTYPE_V2DF:
27535 case INT_FTYPE_V32QI:
27536 case V16QI_FTYPE_V16QI:
27537 case V8SI_FTYPE_V8SF:
27538 case V8SI_FTYPE_V4SI:
27539 case V8HI_FTYPE_V8HI:
27540 case V8HI_FTYPE_V16QI:
27541 case V8QI_FTYPE_V8QI:
27542 case V8SF_FTYPE_V8SF:
27543 case V8SF_FTYPE_V8SI:
27544 case V8SF_FTYPE_V4SF:
27545 case V8SF_FTYPE_V8HI:
27546 case V4SI_FTYPE_V4SI:
27547 case V4SI_FTYPE_V16QI:
27548 case V4SI_FTYPE_V4SF:
27549 case V4SI_FTYPE_V8SI:
27550 case V4SI_FTYPE_V8HI:
27551 case V4SI_FTYPE_V4DF:
27552 case V4SI_FTYPE_V2DF:
27553 case V4HI_FTYPE_V4HI:
27554 case V4DF_FTYPE_V4DF:
27555 case V4DF_FTYPE_V4SI:
27556 case V4DF_FTYPE_V4SF:
27557 case V4DF_FTYPE_V2DF:
27558 case V4SF_FTYPE_V4SF:
27559 case V4SF_FTYPE_V4SI:
27560 case V4SF_FTYPE_V8SF:
27561 case V4SF_FTYPE_V4DF:
27562 case V4SF_FTYPE_V8HI:
27563 case V4SF_FTYPE_V2DF:
27564 case V2DI_FTYPE_V2DI:
27565 case V2DI_FTYPE_V16QI:
27566 case V2DI_FTYPE_V8HI:
27567 case V2DI_FTYPE_V4SI:
27568 case V2DF_FTYPE_V2DF:
27569 case V2DF_FTYPE_V4SI:
27570 case V2DF_FTYPE_V4DF:
27571 case V2DF_FTYPE_V4SF:
27572 case V2DF_FTYPE_V2SI:
27573 case V2SI_FTYPE_V2SI:
27574 case V2SI_FTYPE_V4SF:
27575 case V2SI_FTYPE_V2SF:
27576 case V2SI_FTYPE_V2DF:
27577 case V2SF_FTYPE_V2SF:
27578 case V2SF_FTYPE_V2SI:
27579 case V32QI_FTYPE_V32QI:
27580 case V32QI_FTYPE_V16QI:
27581 case V16HI_FTYPE_V16HI:
27582 case V16HI_FTYPE_V8HI:
27583 case V8SI_FTYPE_V8SI:
27584 case V16HI_FTYPE_V16QI:
27585 case V8SI_FTYPE_V16QI:
27586 case V4DI_FTYPE_V16QI:
27587 case V8SI_FTYPE_V8HI:
27588 case V4DI_FTYPE_V8HI:
27589 case V4DI_FTYPE_V4SI:
27590 case V4DI_FTYPE_V2DI:
27593 case V4SF_FTYPE_V4SF_VEC_MERGE:
27594 case V2DF_FTYPE_V2DF_VEC_MERGE:
27595 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
27596 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
27597 case V16QI_FTYPE_V16QI_V16QI:
27598 case V16QI_FTYPE_V8HI_V8HI:
27599 case V8QI_FTYPE_V8QI_V8QI:
27600 case V8QI_FTYPE_V4HI_V4HI:
27601 case V8HI_FTYPE_V8HI_V8HI:
27602 case V8HI_FTYPE_V16QI_V16QI:
27603 case V8HI_FTYPE_V4SI_V4SI:
27604 case V8SF_FTYPE_V8SF_V8SF:
27605 case V8SF_FTYPE_V8SF_V8SI:
27606 case V4SI_FTYPE_V4SI_V4SI:
27607 case V4SI_FTYPE_V8HI_V8HI:
27608 case V4SI_FTYPE_V4SF_V4SF:
27609 case V4SI_FTYPE_V2DF_V2DF:
27610 case V4HI_FTYPE_V4HI_V4HI:
27611 case V4HI_FTYPE_V8QI_V8QI:
27612 case V4HI_FTYPE_V2SI_V2SI:
27613 case V4DF_FTYPE_V4DF_V4DF:
27614 case V4DF_FTYPE_V4DF_V4DI:
27615 case V4SF_FTYPE_V4SF_V4SF:
27616 case V4SF_FTYPE_V4SF_V4SI:
27617 case V4SF_FTYPE_V4SF_V2SI:
27618 case V4SF_FTYPE_V4SF_V2DF:
27619 case V4SF_FTYPE_V4SF_DI:
27620 case V4SF_FTYPE_V4SF_SI:
27621 case V2DI_FTYPE_V2DI_V2DI:
27622 case V2DI_FTYPE_V16QI_V16QI:
27623 case V2DI_FTYPE_V4SI_V4SI:
27624 case V2DI_FTYPE_V2DI_V16QI:
27625 case V2DI_FTYPE_V2DF_V2DF:
27626 case V2SI_FTYPE_V2SI_V2SI:
27627 case V2SI_FTYPE_V4HI_V4HI:
27628 case V2SI_FTYPE_V2SF_V2SF:
27629 case V2DF_FTYPE_V2DF_V2DF:
27630 case V2DF_FTYPE_V2DF_V4SF:
27631 case V2DF_FTYPE_V2DF_V2DI:
27632 case V2DF_FTYPE_V2DF_DI:
27633 case V2DF_FTYPE_V2DF_SI:
27634 case V2SF_FTYPE_V2SF_V2SF:
27635 case V1DI_FTYPE_V1DI_V1DI:
27636 case V1DI_FTYPE_V8QI_V8QI:
27637 case V1DI_FTYPE_V2SI_V2SI:
27638 case V32QI_FTYPE_V16HI_V16HI:
27639 case V16HI_FTYPE_V8SI_V8SI:
27640 case V32QI_FTYPE_V32QI_V32QI:
27641 case V16HI_FTYPE_V32QI_V32QI:
27642 case V16HI_FTYPE_V16HI_V16HI:
27643 case V8SI_FTYPE_V8SI_V8SI:
27644 case V8SI_FTYPE_V16HI_V16HI:
27645 case V4DI_FTYPE_V4DI_V4DI:
27646 case V4DI_FTYPE_V8SI_V8SI:
27647 if (comparison == UNKNOWN)
27648 return ix86_expand_binop_builtin (icode, exp, target);
27651 case V4SF_FTYPE_V4SF_V4SF_SWAP:
27652 case V2DF_FTYPE_V2DF_V2DF_SWAP:
27653 gcc_assert (comparison != UNKNOWN);
27657 case V16HI_FTYPE_V16HI_V8HI_COUNT:
27658 case V16HI_FTYPE_V16HI_SI_COUNT:
27659 case V8SI_FTYPE_V8SI_V4SI_COUNT:
27660 case V8SI_FTYPE_V8SI_SI_COUNT:
27661 case V4DI_FTYPE_V4DI_V2DI_COUNT:
27662 case V4DI_FTYPE_V4DI_INT_COUNT:
27663 case V8HI_FTYPE_V8HI_V8HI_COUNT:
27664 case V8HI_FTYPE_V8HI_SI_COUNT:
27665 case V4SI_FTYPE_V4SI_V4SI_COUNT:
27666 case V4SI_FTYPE_V4SI_SI_COUNT:
27667 case V4HI_FTYPE_V4HI_V4HI_COUNT:
27668 case V4HI_FTYPE_V4HI_SI_COUNT:
27669 case V2DI_FTYPE_V2DI_V2DI_COUNT:
27670 case V2DI_FTYPE_V2DI_SI_COUNT:
27671 case V2SI_FTYPE_V2SI_V2SI_COUNT:
27672 case V2SI_FTYPE_V2SI_SI_COUNT:
27673 case V1DI_FTYPE_V1DI_V1DI_COUNT:
27674 case V1DI_FTYPE_V1DI_SI_COUNT:
27676 last_arg_count = true;
27678 case UINT64_FTYPE_UINT64_UINT64:
27679 case UINT_FTYPE_UINT_UINT:
27680 case UINT_FTYPE_UINT_USHORT:
27681 case UINT_FTYPE_UINT_UCHAR:
27682 case UINT16_FTYPE_UINT16_INT:
27683 case UINT8_FTYPE_UINT8_INT:
27686 case V2DI_FTYPE_V2DI_INT_CONVERT:
27689 nargs_constant = 1;
27691 case V8HI_FTYPE_V8HI_INT:
27692 case V8HI_FTYPE_V8SF_INT:
27693 case V8HI_FTYPE_V4SF_INT:
27694 case V8SF_FTYPE_V8SF_INT:
27695 case V4SI_FTYPE_V4SI_INT:
27696 case V4SI_FTYPE_V8SI_INT:
27697 case V4HI_FTYPE_V4HI_INT:
27698 case V4DF_FTYPE_V4DF_INT:
27699 case V4SF_FTYPE_V4SF_INT:
27700 case V4SF_FTYPE_V8SF_INT:
27701 case V2DI_FTYPE_V2DI_INT:
27702 case V2DF_FTYPE_V2DF_INT:
27703 case V2DF_FTYPE_V4DF_INT:
27704 case V16HI_FTYPE_V16HI_INT:
27705 case V8SI_FTYPE_V8SI_INT:
27706 case V4DI_FTYPE_V4DI_INT:
27707 case V2DI_FTYPE_V4DI_INT:
27709 nargs_constant = 1;
27711 case V16QI_FTYPE_V16QI_V16QI_V16QI:
27712 case V8SF_FTYPE_V8SF_V8SF_V8SF:
27713 case V4DF_FTYPE_V4DF_V4DF_V4DF:
27714 case V4SF_FTYPE_V4SF_V4SF_V4SF:
27715 case V2DF_FTYPE_V2DF_V2DF_V2DF:
27716 case V32QI_FTYPE_V32QI_V32QI_V32QI:
27719 case V32QI_FTYPE_V32QI_V32QI_INT:
27720 case V16HI_FTYPE_V16HI_V16HI_INT:
27721 case V16QI_FTYPE_V16QI_V16QI_INT:
27722 case V4DI_FTYPE_V4DI_V4DI_INT:
27723 case V8HI_FTYPE_V8HI_V8HI_INT:
27724 case V8SI_FTYPE_V8SI_V8SI_INT:
27725 case V8SI_FTYPE_V8SI_V4SI_INT:
27726 case V8SF_FTYPE_V8SF_V8SF_INT:
27727 case V8SF_FTYPE_V8SF_V4SF_INT:
27728 case V4SI_FTYPE_V4SI_V4SI_INT:
27729 case V4DF_FTYPE_V4DF_V4DF_INT:
27730 case V4DF_FTYPE_V4DF_V2DF_INT:
27731 case V4SF_FTYPE_V4SF_V4SF_INT:
27732 case V2DI_FTYPE_V2DI_V2DI_INT:
27733 case V4DI_FTYPE_V4DI_V2DI_INT:
27734 case V2DF_FTYPE_V2DF_V2DF_INT:
27736 nargs_constant = 1;
27738 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
27741 nargs_constant = 1;
27743 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
27746 nargs_constant = 1;
27748 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
27751 nargs_constant = 1;
27753 case V2DI_FTYPE_V2DI_UINT_UINT:
27755 nargs_constant = 2;
27757 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
27758 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
27759 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
27760 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
27762 nargs_constant = 1;
27764 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
27766 nargs_constant = 2;
27769 gcc_unreachable ();
27772 gcc_assert (nargs <= ARRAY_SIZE (args));
27774 if (comparison != UNKNOWN)
27776 gcc_assert (nargs == 2);
27777 return ix86_expand_sse_compare (d, exp, target, swap);
27780 if (rmode == VOIDmode || rmode == tmode)
27784 || GET_MODE (target) != tmode
27785 || !insn_p->operand[0].predicate (target, tmode))
27786 target = gen_reg_rtx (tmode);
27787 real_target = target;
27791 target = gen_reg_rtx (rmode);
27792 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
27795 for (i = 0; i < nargs; i++)
27797 tree arg = CALL_EXPR_ARG (exp, i);
27798 rtx op = expand_normal (arg);
27799 enum machine_mode mode = insn_p->operand[i + 1].mode;
27800 bool match = insn_p->operand[i + 1].predicate (op, mode);
27802 if (last_arg_count && (i + 1) == nargs)
27804 /* SIMD shift insns take either an 8-bit immediate or
27805 register as count. But builtin functions take int as
27806 count. If count doesn't match, we put it in register. */
27809 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
27810 if (!insn_p->operand[i + 1].predicate (op, mode))
27811 op = copy_to_reg (op);
27814 else if ((nargs - i) <= nargs_constant)
27819 case CODE_FOR_avx2_inserti128:
27820 case CODE_FOR_avx2_extracti128:
27821 error ("the last argument must be an 1-bit immediate");
27824 case CODE_FOR_sse4_1_roundpd:
27825 case CODE_FOR_sse4_1_roundps:
27826 case CODE_FOR_sse4_1_roundsd:
27827 case CODE_FOR_sse4_1_roundss:
27828 case CODE_FOR_sse4_1_blendps:
27829 case CODE_FOR_avx_blendpd256:
27830 case CODE_FOR_avx_vpermilv4df:
27831 case CODE_FOR_avx_roundpd256:
27832 case CODE_FOR_avx_roundps256:
27833 error ("the last argument must be a 4-bit immediate");
27836 case CODE_FOR_sse4_1_blendpd:
27837 case CODE_FOR_avx_vpermilv2df:
27838 case CODE_FOR_xop_vpermil2v2df3:
27839 case CODE_FOR_xop_vpermil2v4sf3:
27840 case CODE_FOR_xop_vpermil2v4df3:
27841 case CODE_FOR_xop_vpermil2v8sf3:
27842 error ("the last argument must be a 2-bit immediate");
27845 case CODE_FOR_avx_vextractf128v4df:
27846 case CODE_FOR_avx_vextractf128v8sf:
27847 case CODE_FOR_avx_vextractf128v8si:
27848 case CODE_FOR_avx_vinsertf128v4df:
27849 case CODE_FOR_avx_vinsertf128v8sf:
27850 case CODE_FOR_avx_vinsertf128v8si:
27851 error ("the last argument must be a 1-bit immediate");
27854 case CODE_FOR_avx_vmcmpv2df3:
27855 case CODE_FOR_avx_vmcmpv4sf3:
27856 case CODE_FOR_avx_cmpv2df3:
27857 case CODE_FOR_avx_cmpv4sf3:
27858 case CODE_FOR_avx_cmpv4df3:
27859 case CODE_FOR_avx_cmpv8sf3:
27860 error ("the last argument must be a 5-bit immediate");
27864 switch (nargs_constant)
27867 if ((nargs - i) == nargs_constant)
27869 error ("the next to last argument must be an 8-bit immediate");
27873 error ("the last argument must be an 8-bit immediate");
27876 gcc_unreachable ();
27883 if (VECTOR_MODE_P (mode))
27884 op = safe_vector_operand (op, mode);
27886 /* If we aren't optimizing, only allow one memory operand to
27888 if (memory_operand (op, mode))
27891 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
27893 if (optimize || !match || num_memory > 1)
27894 op = copy_to_mode_reg (mode, op);
27898 op = copy_to_reg (op);
27899 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
27904 args[i].mode = mode;
27910 pat = GEN_FCN (icode) (real_target, args[0].op);
27913 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
27916 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27920 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27921 args[2].op, args[3].op);
27924 gcc_unreachable ();
27934 /* Subroutine of ix86_expand_builtin to take care of special insns
27935 with variable number of operands. */
27938 ix86_expand_special_args_builtin (const struct builtin_description *d,
27939 tree exp, rtx target)
27943 unsigned int i, nargs, arg_adjust, memory;
27947 enum machine_mode mode;
27949 enum insn_code icode = d->icode;
27950 bool last_arg_constant = false;
27951 const struct insn_data_d *insn_p = &insn_data[icode];
27952 enum machine_mode tmode = insn_p->operand[0].mode;
27953 enum { load, store } klass;
27955 switch ((enum ix86_builtin_func_type) d->flag)
27957 case VOID_FTYPE_VOID:
27958 if (icode == CODE_FOR_avx_vzeroupper)
27959 target = GEN_INT (vzeroupper_intrinsic);
27960 emit_insn (GEN_FCN (icode) (target));
27962 case VOID_FTYPE_UINT64:
27963 case VOID_FTYPE_UNSIGNED:
27969 case UINT64_FTYPE_VOID:
27970 case UNSIGNED_FTYPE_VOID:
27975 case UINT64_FTYPE_PUNSIGNED:
27976 case V2DI_FTYPE_PV2DI:
27977 case V4DI_FTYPE_PV4DI:
27978 case V32QI_FTYPE_PCCHAR:
27979 case V16QI_FTYPE_PCCHAR:
27980 case V8SF_FTYPE_PCV4SF:
27981 case V8SF_FTYPE_PCFLOAT:
27982 case V4SF_FTYPE_PCFLOAT:
27983 case V4DF_FTYPE_PCV2DF:
27984 case V4DF_FTYPE_PCDOUBLE:
27985 case V2DF_FTYPE_PCDOUBLE:
27986 case VOID_FTYPE_PVOID:
27991 case VOID_FTYPE_PV2SF_V4SF:
27992 case VOID_FTYPE_PV4DI_V4DI:
27993 case VOID_FTYPE_PV2DI_V2DI:
27994 case VOID_FTYPE_PCHAR_V32QI:
27995 case VOID_FTYPE_PCHAR_V16QI:
27996 case VOID_FTYPE_PFLOAT_V8SF:
27997 case VOID_FTYPE_PFLOAT_V4SF:
27998 case VOID_FTYPE_PDOUBLE_V4DF:
27999 case VOID_FTYPE_PDOUBLE_V2DF:
28000 case VOID_FTYPE_PULONGLONG_ULONGLONG:
28001 case VOID_FTYPE_PINT_INT:
28004 /* Reserve memory operand for target. */
28005 memory = ARRAY_SIZE (args);
28007 case V4SF_FTYPE_V4SF_PCV2SF:
28008 case V2DF_FTYPE_V2DF_PCDOUBLE:
28013 case V8SF_FTYPE_PCV8SF_V8SI:
28014 case V4DF_FTYPE_PCV4DF_V4DI:
28015 case V4SF_FTYPE_PCV4SF_V4SI:
28016 case V2DF_FTYPE_PCV2DF_V2DI:
28017 case V8SI_FTYPE_PCV8SI_V8SI:
28018 case V4DI_FTYPE_PCV4DI_V4DI:
28019 case V4SI_FTYPE_PCV4SI_V4SI:
28020 case V2DI_FTYPE_PCV2DI_V2DI:
28025 case VOID_FTYPE_PV8SF_V8SI_V8SF:
28026 case VOID_FTYPE_PV4DF_V4DI_V4DF:
28027 case VOID_FTYPE_PV4SF_V4SI_V4SF:
28028 case VOID_FTYPE_PV2DF_V2DI_V2DF:
28029 case VOID_FTYPE_PV8SI_V8SI_V8SI:
28030 case VOID_FTYPE_PV4DI_V4DI_V4DI:
28031 case VOID_FTYPE_PV4SI_V4SI_V4SI:
28032 case VOID_FTYPE_PV2DI_V2DI_V2DI:
28035 /* Reserve memory operand for target. */
28036 memory = ARRAY_SIZE (args);
28038 case VOID_FTYPE_UINT_UINT_UINT:
28039 case VOID_FTYPE_UINT64_UINT_UINT:
28040 case UCHAR_FTYPE_UINT_UINT_UINT:
28041 case UCHAR_FTYPE_UINT64_UINT_UINT:
28044 memory = ARRAY_SIZE (args);
28045 last_arg_constant = true;
28048 gcc_unreachable ();
28051 gcc_assert (nargs <= ARRAY_SIZE (args));
28053 if (klass == store)
28055 arg = CALL_EXPR_ARG (exp, 0);
28056 op = expand_normal (arg);
28057 gcc_assert (target == 0);
28060 if (GET_MODE (op) != Pmode)
28061 op = convert_to_mode (Pmode, op, 1);
28062 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
28065 target = force_reg (tmode, op);
28073 || GET_MODE (target) != tmode
28074 || !insn_p->operand[0].predicate (target, tmode))
28075 target = gen_reg_rtx (tmode);
28078 for (i = 0; i < nargs; i++)
28080 enum machine_mode mode = insn_p->operand[i + 1].mode;
28083 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
28084 op = expand_normal (arg);
28085 match = insn_p->operand[i + 1].predicate (op, mode);
28087 if (last_arg_constant && (i + 1) == nargs)
28091 if (icode == CODE_FOR_lwp_lwpvalsi3
28092 || icode == CODE_FOR_lwp_lwpinssi3
28093 || icode == CODE_FOR_lwp_lwpvaldi3
28094 || icode == CODE_FOR_lwp_lwpinsdi3)
28095 error ("the last argument must be a 32-bit immediate");
28097 error ("the last argument must be an 8-bit immediate");
28105 /* This must be the memory operand. */
28106 if (GET_MODE (op) != Pmode)
28107 op = convert_to_mode (Pmode, op, 1);
28108 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
28109 gcc_assert (GET_MODE (op) == mode
28110 || GET_MODE (op) == VOIDmode);
28114 /* This must be register. */
28115 if (VECTOR_MODE_P (mode))
28116 op = safe_vector_operand (op, mode);
28118 gcc_assert (GET_MODE (op) == mode
28119 || GET_MODE (op) == VOIDmode);
28120 op = copy_to_mode_reg (mode, op);
28125 args[i].mode = mode;
28131 pat = GEN_FCN (icode) (target);
28134 pat = GEN_FCN (icode) (target, args[0].op);
28137 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
28140 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
28143 gcc_unreachable ();
28149 return klass == store ? 0 : target;
28152 /* Return the integer constant in ARG. Constrain it to be in the range
28153 of the subparts of VEC_TYPE; issue an error if not. */
28156 get_element_number (tree vec_type, tree arg)
28158 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
28160 if (!host_integerp (arg, 1)
28161 || (elt = tree_low_cst (arg, 1), elt > max))
28163 error ("selector must be an integer constant in the range 0..%wi", max);
28170 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28171 ix86_expand_vector_init. We DO have language-level syntax for this, in
28172 the form of (type){ init-list }. Except that since we can't place emms
28173 instructions from inside the compiler, we can't allow the use of MMX
28174 registers unless the user explicitly asks for it. So we do *not* define
28175 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
28176 we have builtins invoked by mmintrin.h that gives us license to emit
28177 these sorts of instructions. */
28180 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
28182 enum machine_mode tmode = TYPE_MODE (type);
28183 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
28184 int i, n_elt = GET_MODE_NUNITS (tmode);
28185 rtvec v = rtvec_alloc (n_elt);
28187 gcc_assert (VECTOR_MODE_P (tmode));
28188 gcc_assert (call_expr_nargs (exp) == n_elt);
28190 for (i = 0; i < n_elt; ++i)
28192 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
28193 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
28196 if (!target || !register_operand (target, tmode))
28197 target = gen_reg_rtx (tmode);
28199 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
28203 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28204 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
28205 had a language-level syntax for referencing vector elements. */
28208 ix86_expand_vec_ext_builtin (tree exp, rtx target)
28210 enum machine_mode tmode, mode0;
28215 arg0 = CALL_EXPR_ARG (exp, 0);
28216 arg1 = CALL_EXPR_ARG (exp, 1);
28218 op0 = expand_normal (arg0);
28219 elt = get_element_number (TREE_TYPE (arg0), arg1);
28221 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
28222 mode0 = TYPE_MODE (TREE_TYPE (arg0));
28223 gcc_assert (VECTOR_MODE_P (mode0));
28225 op0 = force_reg (mode0, op0);
28227 if (optimize || !target || !register_operand (target, tmode))
28228 target = gen_reg_rtx (tmode);
28230 ix86_expand_vector_extract (true, target, op0, elt);
28235 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28236 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
28237 a language-level syntax for referencing vector elements. */
28240 ix86_expand_vec_set_builtin (tree exp)
28242 enum machine_mode tmode, mode1;
28243 tree arg0, arg1, arg2;
28245 rtx op0, op1, target;
28247 arg0 = CALL_EXPR_ARG (exp, 0);
28248 arg1 = CALL_EXPR_ARG (exp, 1);
28249 arg2 = CALL_EXPR_ARG (exp, 2);
28251 tmode = TYPE_MODE (TREE_TYPE (arg0));
28252 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
28253 gcc_assert (VECTOR_MODE_P (tmode));
28255 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
28256 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
28257 elt = get_element_number (TREE_TYPE (arg0), arg2);
28259 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
28260 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
28262 op0 = force_reg (tmode, op0);
28263 op1 = force_reg (mode1, op1);
28265 /* OP0 is the source of these builtin functions and shouldn't be
28266 modified. Create a copy, use it and return it as target. */
28267 target = gen_reg_rtx (tmode);
28268 emit_move_insn (target, op0);
28269 ix86_expand_vector_set (true, target, op1, elt);
28274 /* Expand an expression EXP that calls a built-in function,
28275 with result going to TARGET if that's convenient
28276 (and in mode MODE if that's convenient).
28277 SUBTARGET may be used as the target for computing one of EXP's operands.
28278 IGNORE is nonzero if the value is to be ignored. */
28281 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
28282 enum machine_mode mode ATTRIBUTE_UNUSED,
28283 int ignore ATTRIBUTE_UNUSED)
28285 const struct builtin_description *d;
28287 enum insn_code icode;
28288 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
28289 tree arg0, arg1, arg2, arg3, arg4;
28290 rtx op0, op1, op2, op3, op4, pat;
28291 enum machine_mode mode0, mode1, mode2, mode3, mode4;
28292 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
28294 /* Determine whether the builtin function is available under the current ISA.
28295 Originally the builtin was not created if it wasn't applicable to the
28296 current ISA based on the command line switches. With function specific
28297 options, we need to check in the context of the function making the call
28298 whether it is supported. */
28299 if (ix86_builtins_isa[fcode].isa
28300 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
28302 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
28303 NULL, (enum fpmath_unit) 0, false);
28306 error ("%qE needs unknown isa option", fndecl);
28309 gcc_assert (opts != NULL);
28310 error ("%qE needs isa option %s", fndecl, opts);
28318 case IX86_BUILTIN_MASKMOVQ:
28319 case IX86_BUILTIN_MASKMOVDQU:
28320 icode = (fcode == IX86_BUILTIN_MASKMOVQ
28321 ? CODE_FOR_mmx_maskmovq
28322 : CODE_FOR_sse2_maskmovdqu);
28323 /* Note the arg order is different from the operand order. */
28324 arg1 = CALL_EXPR_ARG (exp, 0);
28325 arg2 = CALL_EXPR_ARG (exp, 1);
28326 arg0 = CALL_EXPR_ARG (exp, 2);
28327 op0 = expand_normal (arg0);
28328 op1 = expand_normal (arg1);
28329 op2 = expand_normal (arg2);
28330 mode0 = insn_data[icode].operand[0].mode;
28331 mode1 = insn_data[icode].operand[1].mode;
28332 mode2 = insn_data[icode].operand[2].mode;
28334 if (GET_MODE (op0) != Pmode)
28335 op0 = convert_to_mode (Pmode, op0, 1);
28336 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
28338 if (!insn_data[icode].operand[0].predicate (op0, mode0))
28339 op0 = copy_to_mode_reg (mode0, op0);
28340 if (!insn_data[icode].operand[1].predicate (op1, mode1))
28341 op1 = copy_to_mode_reg (mode1, op1);
28342 if (!insn_data[icode].operand[2].predicate (op2, mode2))
28343 op2 = copy_to_mode_reg (mode2, op2);
28344 pat = GEN_FCN (icode) (op0, op1, op2);
28350 case IX86_BUILTIN_LDMXCSR:
28351 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
28352 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
28353 emit_move_insn (target, op0);
28354 emit_insn (gen_sse_ldmxcsr (target));
28357 case IX86_BUILTIN_STMXCSR:
28358 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
28359 emit_insn (gen_sse_stmxcsr (target));
28360 return copy_to_mode_reg (SImode, target);
28362 case IX86_BUILTIN_CLFLUSH:
28363 arg0 = CALL_EXPR_ARG (exp, 0);
28364 op0 = expand_normal (arg0);
28365 icode = CODE_FOR_sse2_clflush;
28366 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
28368 if (GET_MODE (op0) != Pmode)
28369 op0 = convert_to_mode (Pmode, op0, 1);
28370 op0 = force_reg (Pmode, op0);
28373 emit_insn (gen_sse2_clflush (op0));
28376 case IX86_BUILTIN_MONITOR:
28377 arg0 = CALL_EXPR_ARG (exp, 0);
28378 arg1 = CALL_EXPR_ARG (exp, 1);
28379 arg2 = CALL_EXPR_ARG (exp, 2);
28380 op0 = expand_normal (arg0);
28381 op1 = expand_normal (arg1);
28382 op2 = expand_normal (arg2);
28385 if (GET_MODE (op0) != Pmode)
28386 op0 = convert_to_mode (Pmode, op0, 1);
28387 op0 = force_reg (Pmode, op0);
28390 op1 = copy_to_mode_reg (SImode, op1);
28392 op2 = copy_to_mode_reg (SImode, op2);
28393 emit_insn (ix86_gen_monitor (op0, op1, op2));
28396 case IX86_BUILTIN_MWAIT:
28397 arg0 = CALL_EXPR_ARG (exp, 0);
28398 arg1 = CALL_EXPR_ARG (exp, 1);
28399 op0 = expand_normal (arg0);
28400 op1 = expand_normal (arg1);
28402 op0 = copy_to_mode_reg (SImode, op0);
28404 op1 = copy_to_mode_reg (SImode, op1);
28405 emit_insn (gen_sse3_mwait (op0, op1));
28408 case IX86_BUILTIN_VEC_INIT_V2SI:
28409 case IX86_BUILTIN_VEC_INIT_V4HI:
28410 case IX86_BUILTIN_VEC_INIT_V8QI:
28411 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
28413 case IX86_BUILTIN_VEC_EXT_V2DF:
28414 case IX86_BUILTIN_VEC_EXT_V2DI:
28415 case IX86_BUILTIN_VEC_EXT_V4SF:
28416 case IX86_BUILTIN_VEC_EXT_V4SI:
28417 case IX86_BUILTIN_VEC_EXT_V8HI:
28418 case IX86_BUILTIN_VEC_EXT_V2SI:
28419 case IX86_BUILTIN_VEC_EXT_V4HI:
28420 case IX86_BUILTIN_VEC_EXT_V16QI:
28421 return ix86_expand_vec_ext_builtin (exp, target);
28423 case IX86_BUILTIN_VEC_SET_V2DI:
28424 case IX86_BUILTIN_VEC_SET_V4SF:
28425 case IX86_BUILTIN_VEC_SET_V4SI:
28426 case IX86_BUILTIN_VEC_SET_V8HI:
28427 case IX86_BUILTIN_VEC_SET_V4HI:
28428 case IX86_BUILTIN_VEC_SET_V16QI:
28429 return ix86_expand_vec_set_builtin (exp);
28431 case IX86_BUILTIN_VEC_PERM_V2DF:
28432 case IX86_BUILTIN_VEC_PERM_V4SF:
28433 case IX86_BUILTIN_VEC_PERM_V2DI:
28434 case IX86_BUILTIN_VEC_PERM_V4SI:
28435 case IX86_BUILTIN_VEC_PERM_V8HI:
28436 case IX86_BUILTIN_VEC_PERM_V16QI:
28437 case IX86_BUILTIN_VEC_PERM_V2DI_U:
28438 case IX86_BUILTIN_VEC_PERM_V4SI_U:
28439 case IX86_BUILTIN_VEC_PERM_V8HI_U:
28440 case IX86_BUILTIN_VEC_PERM_V16QI_U:
28441 case IX86_BUILTIN_VEC_PERM_V4DF:
28442 case IX86_BUILTIN_VEC_PERM_V8SF:
28443 return ix86_expand_vec_perm_builtin (exp);
28445 case IX86_BUILTIN_INFQ:
28446 case IX86_BUILTIN_HUGE_VALQ:
28448 REAL_VALUE_TYPE inf;
28452 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
28454 tmp = validize_mem (force_const_mem (mode, tmp));
28457 target = gen_reg_rtx (mode);
28459 emit_move_insn (target, tmp);
28463 case IX86_BUILTIN_LLWPCB:
28464 arg0 = CALL_EXPR_ARG (exp, 0);
28465 op0 = expand_normal (arg0);
28466 icode = CODE_FOR_lwp_llwpcb;
28467 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
28469 if (GET_MODE (op0) != Pmode)
28470 op0 = convert_to_mode (Pmode, op0, 1);
28471 op0 = force_reg (Pmode, op0);
28473 emit_insn (gen_lwp_llwpcb (op0));
28476 case IX86_BUILTIN_SLWPCB:
28477 icode = CODE_FOR_lwp_slwpcb;
28479 || !insn_data[icode].operand[0].predicate (target, Pmode))
28480 target = gen_reg_rtx (Pmode);
28481 emit_insn (gen_lwp_slwpcb (target));
28484 case IX86_BUILTIN_BEXTRI32:
28485 case IX86_BUILTIN_BEXTRI64:
28486 arg0 = CALL_EXPR_ARG (exp, 0);
28487 arg1 = CALL_EXPR_ARG (exp, 1);
28488 op0 = expand_normal (arg0);
28489 op1 = expand_normal (arg1);
28490 icode = (fcode == IX86_BUILTIN_BEXTRI32
28491 ? CODE_FOR_tbm_bextri_si
28492 : CODE_FOR_tbm_bextri_di);
28493 if (!CONST_INT_P (op1))
28495 error ("last argument must be an immediate");
28500 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
28501 unsigned char lsb_index = INTVAL (op1) & 0xFF;
28502 op1 = GEN_INT (length);
28503 op2 = GEN_INT (lsb_index);
28504 pat = GEN_FCN (icode) (target, op0, op1, op2);
28510 case IX86_BUILTIN_RDRAND16_STEP:
28511 icode = CODE_FOR_rdrandhi_1;
28515 case IX86_BUILTIN_RDRAND32_STEP:
28516 icode = CODE_FOR_rdrandsi_1;
28520 case IX86_BUILTIN_RDRAND64_STEP:
28521 icode = CODE_FOR_rdranddi_1;
28525 op0 = gen_reg_rtx (mode0);
28526 emit_insn (GEN_FCN (icode) (op0));
28528 arg0 = CALL_EXPR_ARG (exp, 0);
28529 op1 = expand_normal (arg0);
28530 if (!address_operand (op1, VOIDmode))
28532 op1 = convert_memory_address (Pmode, op1);
28533 op1 = copy_addr_to_reg (op1);
28535 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
28537 op1 = gen_reg_rtx (SImode);
28538 emit_move_insn (op1, CONST1_RTX (SImode));
28540 /* Emit SImode conditional move. */
28541 if (mode0 == HImode)
28543 op2 = gen_reg_rtx (SImode);
28544 emit_insn (gen_zero_extendhisi2 (op2, op0));
28546 else if (mode0 == SImode)
28549 op2 = gen_rtx_SUBREG (SImode, op0, 0);
28552 target = gen_reg_rtx (SImode);
28554 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
28556 emit_insn (gen_rtx_SET (VOIDmode, target,
28557 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
28560 case IX86_BUILTIN_GATHERSIV2DF:
28561 icode = CODE_FOR_avx2_gathersiv2df;
28563 case IX86_BUILTIN_GATHERSIV4DF:
28564 icode = CODE_FOR_avx2_gathersiv4df;
28566 case IX86_BUILTIN_GATHERDIV2DF:
28567 icode = CODE_FOR_avx2_gatherdiv2df;
28569 case IX86_BUILTIN_GATHERDIV4DF:
28570 icode = CODE_FOR_avx2_gatherdiv4df;
28572 case IX86_BUILTIN_GATHERSIV4SF:
28573 icode = CODE_FOR_avx2_gathersiv4sf;
28575 case IX86_BUILTIN_GATHERSIV8SF:
28576 icode = CODE_FOR_avx2_gathersiv8sf;
28578 case IX86_BUILTIN_GATHERDIV4SF:
28579 icode = CODE_FOR_avx2_gatherdiv4sf;
28581 case IX86_BUILTIN_GATHERDIV8SF:
28582 icode = CODE_FOR_avx2_gatherdiv4sf256;
28584 case IX86_BUILTIN_GATHERSIV2DI:
28585 icode = CODE_FOR_avx2_gathersiv2di;
28587 case IX86_BUILTIN_GATHERSIV4DI:
28588 icode = CODE_FOR_avx2_gathersiv4di;
28590 case IX86_BUILTIN_GATHERDIV2DI:
28591 icode = CODE_FOR_avx2_gatherdiv2di;
28593 case IX86_BUILTIN_GATHERDIV4DI:
28594 icode = CODE_FOR_avx2_gatherdiv4di;
28596 case IX86_BUILTIN_GATHERSIV4SI:
28597 icode = CODE_FOR_avx2_gathersiv4si;
28599 case IX86_BUILTIN_GATHERSIV8SI:
28600 icode = CODE_FOR_avx2_gathersiv8si;
28602 case IX86_BUILTIN_GATHERDIV4SI:
28603 icode = CODE_FOR_avx2_gatherdiv4si;
28605 case IX86_BUILTIN_GATHERDIV8SI:
28606 icode = CODE_FOR_avx2_gatherdiv4si256;
28609 arg0 = CALL_EXPR_ARG (exp, 0);
28610 arg1 = CALL_EXPR_ARG (exp, 1);
28611 arg2 = CALL_EXPR_ARG (exp, 2);
28612 arg3 = CALL_EXPR_ARG (exp, 3);
28613 arg4 = CALL_EXPR_ARG (exp, 4);
28614 op0 = expand_normal (arg0);
28615 op1 = expand_normal (arg1);
28616 op2 = expand_normal (arg2);
28617 op3 = expand_normal (arg3);
28618 op4 = expand_normal (arg4);
28619 /* Note the arg order is different from the operand order. */
28620 mode0 = insn_data[icode].operand[1].mode;
28621 mode1 = insn_data[icode].operand[2].mode;
28622 mode2 = insn_data[icode].operand[3].mode;
28623 mode3 = insn_data[icode].operand[4].mode;
28624 mode4 = insn_data[icode].operand[5].mode;
28626 if (target == NULL_RTX)
28627 target = gen_reg_rtx (insn_data[icode].operand[0].mode);
28629 /* Force memory operand only with base register here. But we
28630 don't want to do it on memory operand for other builtin
28632 if (GET_MODE (op1) != Pmode)
28633 op1 = convert_to_mode (Pmode, op1, 1);
28634 op1 = force_reg (Pmode, op1);
28635 op1 = gen_rtx_MEM (mode1, op1);
28637 if (!insn_data[icode].operand[1].predicate (op0, mode0))
28638 op0 = copy_to_mode_reg (mode0, op0);
28639 if (!insn_data[icode].operand[2].predicate (op1, mode1))
28640 op1 = copy_to_mode_reg (mode1, op1);
28641 if (!insn_data[icode].operand[3].predicate (op2, mode2))
28642 op2 = copy_to_mode_reg (mode2, op2);
28643 if (!insn_data[icode].operand[4].predicate (op3, mode3))
28644 op3 = copy_to_mode_reg (mode3, op3);
28645 if (!insn_data[icode].operand[5].predicate (op4, mode4))
28647 error ("last argument must be scale 1, 2, 4, 8");
28650 pat = GEN_FCN (icode) (target, op0, op1, op2, op3, op4);
28660 for (i = 0, d = bdesc_special_args;
28661 i < ARRAY_SIZE (bdesc_special_args);
28663 if (d->code == fcode)
28664 return ix86_expand_special_args_builtin (d, exp, target);
28666 for (i = 0, d = bdesc_args;
28667 i < ARRAY_SIZE (bdesc_args);
28669 if (d->code == fcode)
28672 case IX86_BUILTIN_FABSQ:
28673 case IX86_BUILTIN_COPYSIGNQ:
28675 /* Emit a normal call if SSE2 isn't available. */
28676 return expand_call (exp, target, ignore);
28678 return ix86_expand_args_builtin (d, exp, target);
28681 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
28682 if (d->code == fcode)
28683 return ix86_expand_sse_comi (d, exp, target);
28685 for (i = 0, d = bdesc_pcmpestr;
28686 i < ARRAY_SIZE (bdesc_pcmpestr);
28688 if (d->code == fcode)
28689 return ix86_expand_sse_pcmpestr (d, exp, target);
28691 for (i = 0, d = bdesc_pcmpistr;
28692 i < ARRAY_SIZE (bdesc_pcmpistr);
28694 if (d->code == fcode)
28695 return ix86_expand_sse_pcmpistr (d, exp, target);
28697 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
28698 if (d->code == fcode)
28699 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
28700 (enum ix86_builtin_func_type)
28701 d->flag, d->comparison);
28703 gcc_unreachable ();
28706 /* Returns a function decl for a vectorized version of the builtin function
28707 with builtin function code FN and the result vector type TYPE, or NULL_TREE
28708 if it is not available. */
28711 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
28714 enum machine_mode in_mode, out_mode;
28716 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
28718 if (TREE_CODE (type_out) != VECTOR_TYPE
28719 || TREE_CODE (type_in) != VECTOR_TYPE
28720 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
28723 out_mode = TYPE_MODE (TREE_TYPE (type_out));
28724 out_n = TYPE_VECTOR_SUBPARTS (type_out);
28725 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28726 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28730 case BUILT_IN_SQRT:
28731 if (out_mode == DFmode && in_mode == DFmode)
28733 if (out_n == 2 && in_n == 2)
28734 return ix86_builtins[IX86_BUILTIN_SQRTPD];
28735 else if (out_n == 4 && in_n == 4)
28736 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
28740 case BUILT_IN_SQRTF:
28741 if (out_mode == SFmode && in_mode == SFmode)
28743 if (out_n == 4 && in_n == 4)
28744 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
28745 else if (out_n == 8 && in_n == 8)
28746 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
28750 case BUILT_IN_LRINT:
28751 if (out_mode == SImode && out_n == 4
28752 && in_mode == DFmode && in_n == 2)
28753 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
28756 case BUILT_IN_LRINTF:
28757 if (out_mode == SImode && in_mode == SFmode)
28759 if (out_n == 4 && in_n == 4)
28760 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
28761 else if (out_n == 8 && in_n == 8)
28762 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
28766 case BUILT_IN_COPYSIGN:
28767 if (out_mode == DFmode && in_mode == DFmode)
28769 if (out_n == 2 && in_n == 2)
28770 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
28771 else if (out_n == 4 && in_n == 4)
28772 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
28776 case BUILT_IN_COPYSIGNF:
28777 if (out_mode == SFmode && in_mode == SFmode)
28779 if (out_n == 4 && in_n == 4)
28780 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
28781 else if (out_n == 8 && in_n == 8)
28782 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
28786 case BUILT_IN_FLOOR:
28787 /* The round insn does not trap on denormals. */
28788 if (flag_trapping_math || !TARGET_ROUND)
28791 if (out_mode == DFmode && in_mode == DFmode)
28793 if (out_n == 2 && in_n == 2)
28794 return ix86_builtins[IX86_BUILTIN_FLOORPD];
28795 else if (out_n == 4 && in_n == 4)
28796 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
28800 case BUILT_IN_FLOORF:
28801 /* The round insn does not trap on denormals. */
28802 if (flag_trapping_math || !TARGET_ROUND)
28805 if (out_mode == SFmode && in_mode == SFmode)
28807 if (out_n == 4 && in_n == 4)
28808 return ix86_builtins[IX86_BUILTIN_FLOORPS];
28809 else if (out_n == 8 && in_n == 8)
28810 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
28814 case BUILT_IN_CEIL:
28815 /* The round insn does not trap on denormals. */
28816 if (flag_trapping_math || !TARGET_ROUND)
28819 if (out_mode == DFmode && in_mode == DFmode)
28821 if (out_n == 2 && in_n == 2)
28822 return ix86_builtins[IX86_BUILTIN_CEILPD];
28823 else if (out_n == 4 && in_n == 4)
28824 return ix86_builtins[IX86_BUILTIN_CEILPD256];
28828 case BUILT_IN_CEILF:
28829 /* The round insn does not trap on denormals. */
28830 if (flag_trapping_math || !TARGET_ROUND)
28833 if (out_mode == SFmode && in_mode == SFmode)
28835 if (out_n == 4 && in_n == 4)
28836 return ix86_builtins[IX86_BUILTIN_CEILPS];
28837 else if (out_n == 8 && in_n == 8)
28838 return ix86_builtins[IX86_BUILTIN_CEILPS256];
28842 case BUILT_IN_TRUNC:
28843 /* The round insn does not trap on denormals. */
28844 if (flag_trapping_math || !TARGET_ROUND)
28847 if (out_mode == DFmode && in_mode == DFmode)
28849 if (out_n == 2 && in_n == 2)
28850 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
28851 else if (out_n == 4 && in_n == 4)
28852 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
28856 case BUILT_IN_TRUNCF:
28857 /* The round insn does not trap on denormals. */
28858 if (flag_trapping_math || !TARGET_ROUND)
28861 if (out_mode == SFmode && in_mode == SFmode)
28863 if (out_n == 4 && in_n == 4)
28864 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
28865 else if (out_n == 8 && in_n == 8)
28866 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
28870 case BUILT_IN_RINT:
28871 /* The round insn does not trap on denormals. */
28872 if (flag_trapping_math || !TARGET_ROUND)
28875 if (out_mode == DFmode && in_mode == DFmode)
28877 if (out_n == 2 && in_n == 2)
28878 return ix86_builtins[IX86_BUILTIN_RINTPD];
28879 else if (out_n == 4 && in_n == 4)
28880 return ix86_builtins[IX86_BUILTIN_RINTPD256];
28884 case BUILT_IN_RINTF:
28885 /* The round insn does not trap on denormals. */
28886 if (flag_trapping_math || !TARGET_ROUND)
28889 if (out_mode == SFmode && in_mode == SFmode)
28891 if (out_n == 4 && in_n == 4)
28892 return ix86_builtins[IX86_BUILTIN_RINTPS];
28893 else if (out_n == 8 && in_n == 8)
28894 return ix86_builtins[IX86_BUILTIN_RINTPS256];
28898 case BUILT_IN_ROUND:
28899 /* The round insn does not trap on denormals. */
28900 if (flag_trapping_math || !TARGET_ROUND)
28903 if (out_mode == DFmode && in_mode == DFmode)
28905 if (out_n == 2 && in_n == 2)
28906 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
28907 else if (out_n == 4 && in_n == 4)
28908 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
28912 case BUILT_IN_ROUNDF:
28913 /* The round insn does not trap on denormals. */
28914 if (flag_trapping_math || !TARGET_ROUND)
28917 if (out_mode == SFmode && in_mode == SFmode)
28919 if (out_n == 4 && in_n == 4)
28920 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
28921 else if (out_n == 8 && in_n == 8)
28922 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
28927 if (out_mode == DFmode && in_mode == DFmode)
28929 if (out_n == 2 && in_n == 2)
28930 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
28931 if (out_n == 4 && in_n == 4)
28932 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
28936 case BUILT_IN_FMAF:
28937 if (out_mode == SFmode && in_mode == SFmode)
28939 if (out_n == 4 && in_n == 4)
28940 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
28941 if (out_n == 8 && in_n == 8)
28942 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
28950 /* Dispatch to a handler for a vectorization library. */
28951 if (ix86_veclib_handler)
28952 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
28958 /* Handler for an SVML-style interface to
28959 a library with vectorized intrinsics. */
28962 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
28965 tree fntype, new_fndecl, args;
28968 enum machine_mode el_mode, in_mode;
28971 /* The SVML is suitable for unsafe math only. */
28972 if (!flag_unsafe_math_optimizations)
28975 el_mode = TYPE_MODE (TREE_TYPE (type_out));
28976 n = TYPE_VECTOR_SUBPARTS (type_out);
28977 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28978 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28979 if (el_mode != in_mode
28987 case BUILT_IN_LOG10:
28989 case BUILT_IN_TANH:
28991 case BUILT_IN_ATAN:
28992 case BUILT_IN_ATAN2:
28993 case BUILT_IN_ATANH:
28994 case BUILT_IN_CBRT:
28995 case BUILT_IN_SINH:
28997 case BUILT_IN_ASINH:
28998 case BUILT_IN_ASIN:
28999 case BUILT_IN_COSH:
29001 case BUILT_IN_ACOSH:
29002 case BUILT_IN_ACOS:
29003 if (el_mode != DFmode || n != 2)
29007 case BUILT_IN_EXPF:
29008 case BUILT_IN_LOGF:
29009 case BUILT_IN_LOG10F:
29010 case BUILT_IN_POWF:
29011 case BUILT_IN_TANHF:
29012 case BUILT_IN_TANF:
29013 case BUILT_IN_ATANF:
29014 case BUILT_IN_ATAN2F:
29015 case BUILT_IN_ATANHF:
29016 case BUILT_IN_CBRTF:
29017 case BUILT_IN_SINHF:
29018 case BUILT_IN_SINF:
29019 case BUILT_IN_ASINHF:
29020 case BUILT_IN_ASINF:
29021 case BUILT_IN_COSHF:
29022 case BUILT_IN_COSF:
29023 case BUILT_IN_ACOSHF:
29024 case BUILT_IN_ACOSF:
29025 if (el_mode != SFmode || n != 4)
29033 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
29035 if (fn == BUILT_IN_LOGF)
29036 strcpy (name, "vmlsLn4");
29037 else if (fn == BUILT_IN_LOG)
29038 strcpy (name, "vmldLn2");
29041 sprintf (name, "vmls%s", bname+10);
29042 name[strlen (name)-1] = '4';
29045 sprintf (name, "vmld%s2", bname+10);
29047 /* Convert to uppercase. */
29051 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
29052 args = TREE_CHAIN (args))
29056 fntype = build_function_type_list (type_out, type_in, NULL);
29058 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
29060 /* Build a function declaration for the vectorized function. */
29061 new_fndecl = build_decl (BUILTINS_LOCATION,
29062 FUNCTION_DECL, get_identifier (name), fntype);
29063 TREE_PUBLIC (new_fndecl) = 1;
29064 DECL_EXTERNAL (new_fndecl) = 1;
29065 DECL_IS_NOVOPS (new_fndecl) = 1;
29066 TREE_READONLY (new_fndecl) = 1;
29071 /* Handler for an ACML-style interface to
29072 a library with vectorized intrinsics. */
29075 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
29077 char name[20] = "__vr.._";
29078 tree fntype, new_fndecl, args;
29081 enum machine_mode el_mode, in_mode;
29084 /* The ACML is 64bits only and suitable for unsafe math only as
29085 it does not correctly support parts of IEEE with the required
29086 precision such as denormals. */
29088 || !flag_unsafe_math_optimizations)
29091 el_mode = TYPE_MODE (TREE_TYPE (type_out));
29092 n = TYPE_VECTOR_SUBPARTS (type_out);
29093 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29094 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29095 if (el_mode != in_mode
29105 case BUILT_IN_LOG2:
29106 case BUILT_IN_LOG10:
29109 if (el_mode != DFmode
29114 case BUILT_IN_SINF:
29115 case BUILT_IN_COSF:
29116 case BUILT_IN_EXPF:
29117 case BUILT_IN_POWF:
29118 case BUILT_IN_LOGF:
29119 case BUILT_IN_LOG2F:
29120 case BUILT_IN_LOG10F:
29123 if (el_mode != SFmode
29132 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
29133 sprintf (name + 7, "%s", bname+10);
29136 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
29137 args = TREE_CHAIN (args))
29141 fntype = build_function_type_list (type_out, type_in, NULL);
29143 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
29145 /* Build a function declaration for the vectorized function. */
29146 new_fndecl = build_decl (BUILTINS_LOCATION,
29147 FUNCTION_DECL, get_identifier (name), fntype);
29148 TREE_PUBLIC (new_fndecl) = 1;
29149 DECL_EXTERNAL (new_fndecl) = 1;
29150 DECL_IS_NOVOPS (new_fndecl) = 1;
29151 TREE_READONLY (new_fndecl) = 1;
29157 /* Returns a decl of a function that implements conversion of an integer vector
29158 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
29159 are the types involved when converting according to CODE.
29160 Return NULL_TREE if it is not available. */
29163 ix86_vectorize_builtin_conversion (unsigned int code,
29164 tree dest_type, tree src_type)
29172 switch (TYPE_MODE (src_type))
29175 switch (TYPE_MODE (dest_type))
29178 return (TYPE_UNSIGNED (src_type)
29179 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
29180 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
29182 return (TYPE_UNSIGNED (src_type)
29184 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
29190 switch (TYPE_MODE (dest_type))
29193 return (TYPE_UNSIGNED (src_type)
29195 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
29204 case FIX_TRUNC_EXPR:
29205 switch (TYPE_MODE (dest_type))
29208 switch (TYPE_MODE (src_type))
29211 return (TYPE_UNSIGNED (dest_type)
29213 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
29215 return (TYPE_UNSIGNED (dest_type)
29217 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
29224 switch (TYPE_MODE (src_type))
29227 return (TYPE_UNSIGNED (dest_type)
29229 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
29246 /* Returns a code for a target-specific builtin that implements
29247 reciprocal of the function, or NULL_TREE if not available. */
29250 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
29251 bool sqrt ATTRIBUTE_UNUSED)
29253 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
29254 && flag_finite_math_only && !flag_trapping_math
29255 && flag_unsafe_math_optimizations))
29259 /* Machine dependent builtins. */
29262 /* Vectorized version of sqrt to rsqrt conversion. */
29263 case IX86_BUILTIN_SQRTPS_NR:
29264 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
29266 case IX86_BUILTIN_SQRTPS_NR256:
29267 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
29273 /* Normal builtins. */
29276 /* Sqrt to rsqrt conversion. */
29277 case BUILT_IN_SQRTF:
29278 return ix86_builtins[IX86_BUILTIN_RSQRTF];
29285 /* Helper for avx_vpermilps256_operand et al. This is also used by
29286 the expansion functions to turn the parallel back into a mask.
29287 The return value is 0 for no match and the imm8+1 for a match. */
29290 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
29292 unsigned i, nelt = GET_MODE_NUNITS (mode);
29294 unsigned char ipar[8];
29296 if (XVECLEN (par, 0) != (int) nelt)
29299 /* Validate that all of the elements are constants, and not totally
29300 out of range. Copy the data into an integral array to make the
29301 subsequent checks easier. */
29302 for (i = 0; i < nelt; ++i)
29304 rtx er = XVECEXP (par, 0, i);
29305 unsigned HOST_WIDE_INT ei;
29307 if (!CONST_INT_P (er))
29318 /* In the 256-bit DFmode case, we can only move elements within
29320 for (i = 0; i < 2; ++i)
29324 mask |= ipar[i] << i;
29326 for (i = 2; i < 4; ++i)
29330 mask |= (ipar[i] - 2) << i;
29335 /* In the 256-bit SFmode case, we have full freedom of movement
29336 within the low 128-bit lane, but the high 128-bit lane must
29337 mirror the exact same pattern. */
29338 for (i = 0; i < 4; ++i)
29339 if (ipar[i] + 4 != ipar[i + 4])
29346 /* In the 128-bit case, we've full freedom in the placement of
29347 the elements from the source operand. */
29348 for (i = 0; i < nelt; ++i)
29349 mask |= ipar[i] << (i * (nelt / 2));
29353 gcc_unreachable ();
29356 /* Make sure success has a non-zero value by adding one. */
29360 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
29361 the expansion functions to turn the parallel back into a mask.
29362 The return value is 0 for no match and the imm8+1 for a match. */
29365 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
29367 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
29369 unsigned char ipar[8];
29371 if (XVECLEN (par, 0) != (int) nelt)
29374 /* Validate that all of the elements are constants, and not totally
29375 out of range. Copy the data into an integral array to make the
29376 subsequent checks easier. */
29377 for (i = 0; i < nelt; ++i)
29379 rtx er = XVECEXP (par, 0, i);
29380 unsigned HOST_WIDE_INT ei;
29382 if (!CONST_INT_P (er))
29385 if (ei >= 2 * nelt)
29390 /* Validate that the halves of the permute are halves. */
29391 for (i = 0; i < nelt2 - 1; ++i)
29392 if (ipar[i] + 1 != ipar[i + 1])
29394 for (i = nelt2; i < nelt - 1; ++i)
29395 if (ipar[i] + 1 != ipar[i + 1])
29398 /* Reconstruct the mask. */
29399 for (i = 0; i < 2; ++i)
29401 unsigned e = ipar[i * nelt2];
29405 mask |= e << (i * 4);
29408 /* Make sure success has a non-zero value by adding one. */
29413 /* Store OPERAND to the memory after reload is completed. This means
29414 that we can't easily use assign_stack_local. */
29416 ix86_force_to_memory (enum machine_mode mode, rtx operand)
29420 gcc_assert (reload_completed);
29421 if (ix86_using_red_zone ())
29423 result = gen_rtx_MEM (mode,
29424 gen_rtx_PLUS (Pmode,
29426 GEN_INT (-RED_ZONE_SIZE)));
29427 emit_move_insn (result, operand);
29429 else if (TARGET_64BIT)
29435 operand = gen_lowpart (DImode, operand);
29439 gen_rtx_SET (VOIDmode,
29440 gen_rtx_MEM (DImode,
29441 gen_rtx_PRE_DEC (DImode,
29442 stack_pointer_rtx)),
29446 gcc_unreachable ();
29448 result = gen_rtx_MEM (mode, stack_pointer_rtx);
29457 split_double_mode (mode, &operand, 1, operands, operands + 1);
29459 gen_rtx_SET (VOIDmode,
29460 gen_rtx_MEM (SImode,
29461 gen_rtx_PRE_DEC (Pmode,
29462 stack_pointer_rtx)),
29465 gen_rtx_SET (VOIDmode,
29466 gen_rtx_MEM (SImode,
29467 gen_rtx_PRE_DEC (Pmode,
29468 stack_pointer_rtx)),
29473 /* Store HImodes as SImodes. */
29474 operand = gen_lowpart (SImode, operand);
29478 gen_rtx_SET (VOIDmode,
29479 gen_rtx_MEM (GET_MODE (operand),
29480 gen_rtx_PRE_DEC (SImode,
29481 stack_pointer_rtx)),
29485 gcc_unreachable ();
29487 result = gen_rtx_MEM (mode, stack_pointer_rtx);
29492 /* Free operand from the memory. */
29494 ix86_free_from_memory (enum machine_mode mode)
29496 if (!ix86_using_red_zone ())
29500 if (mode == DImode || TARGET_64BIT)
29504 /* Use LEA to deallocate stack space. In peephole2 it will be converted
29505 to pop or add instruction if registers are available. */
29506 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
29507 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
29512 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
29514 Put float CONST_DOUBLE in the constant pool instead of fp regs.
29515 QImode must go into class Q_REGS.
29516 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
29517 movdf to do mem-to-mem moves through integer regs. */
29520 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
29522 enum machine_mode mode = GET_MODE (x);
29524 /* We're only allowed to return a subclass of CLASS. Many of the
29525 following checks fail for NO_REGS, so eliminate that early. */
29526 if (regclass == NO_REGS)
29529 /* All classes can load zeros. */
29530 if (x == CONST0_RTX (mode))
29533 /* Force constants into memory if we are loading a (nonzero) constant into
29534 an MMX or SSE register. This is because there are no MMX/SSE instructions
29535 to load from a constant. */
29537 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
29540 /* Prefer SSE regs only, if we can use them for math. */
29541 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
29542 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
29544 /* Floating-point constants need more complex checks. */
29545 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
29547 /* General regs can load everything. */
29548 if (reg_class_subset_p (regclass, GENERAL_REGS))
29551 /* Floats can load 0 and 1 plus some others. Note that we eliminated
29552 zero above. We only want to wind up preferring 80387 registers if
29553 we plan on doing computation with them. */
29555 && standard_80387_constant_p (x) > 0)
29557 /* Limit class to non-sse. */
29558 if (regclass == FLOAT_SSE_REGS)
29560 if (regclass == FP_TOP_SSE_REGS)
29562 if (regclass == FP_SECOND_SSE_REGS)
29563 return FP_SECOND_REG;
29564 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
29571 /* Generally when we see PLUS here, it's the function invariant
29572 (plus soft-fp const_int). Which can only be computed into general
29574 if (GET_CODE (x) == PLUS)
29575 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
29577 /* QImode constants are easy to load, but non-constant QImode data
29578 must go into Q_REGS. */
29579 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
29581 if (reg_class_subset_p (regclass, Q_REGS))
29583 if (reg_class_subset_p (Q_REGS, regclass))
29591 /* Discourage putting floating-point values in SSE registers unless
29592 SSE math is being used, and likewise for the 387 registers. */
29594 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
29596 enum machine_mode mode = GET_MODE (x);
29598 /* Restrict the output reload class to the register bank that we are doing
29599 math on. If we would like not to return a subset of CLASS, reject this
29600 alternative: if reload cannot do this, it will still use its choice. */
29601 mode = GET_MODE (x);
29602 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
29603 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
29605 if (X87_FLOAT_MODE_P (mode))
29607 if (regclass == FP_TOP_SSE_REGS)
29609 else if (regclass == FP_SECOND_SSE_REGS)
29610 return FP_SECOND_REG;
29612 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
29619 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
29620 enum machine_mode mode, secondary_reload_info *sri)
29622 /* Double-word spills from general registers to non-offsettable memory
29623 references (zero-extended addresses) require special handling. */
29626 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
29627 && rclass == GENERAL_REGS
29628 && !offsettable_memref_p (x))
29631 ? CODE_FOR_reload_noff_load
29632 : CODE_FOR_reload_noff_store);
29633 /* Add the cost of moving address to a temporary. */
29634 sri->extra_cost = 1;
29639 /* QImode spills from non-QI registers require
29640 intermediate register on 32bit targets. */
29642 && !in_p && mode == QImode
29643 && (rclass == GENERAL_REGS
29644 || rclass == LEGACY_REGS
29645 || rclass == INDEX_REGS))
29654 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
29655 regno = true_regnum (x);
29657 /* Return Q_REGS if the operand is in memory. */
29662 /* This condition handles corner case where an expression involving
29663 pointers gets vectorized. We're trying to use the address of a
29664 stack slot as a vector initializer.
29666 (set (reg:V2DI 74 [ vect_cst_.2 ])
29667 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
29669 Eventually frame gets turned into sp+offset like this:
29671 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29672 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
29673 (const_int 392 [0x188]))))
29675 That later gets turned into:
29677 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29678 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
29679 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
29681 We'll have the following reload recorded:
29683 Reload 0: reload_in (DI) =
29684 (plus:DI (reg/f:DI 7 sp)
29685 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
29686 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29687 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
29688 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
29689 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29690 reload_reg_rtx: (reg:V2DI 22 xmm1)
29692 Which isn't going to work since SSE instructions can't handle scalar
29693 additions. Returning GENERAL_REGS forces the addition into integer
29694 register and reload can handle subsequent reloads without problems. */
29696 if (in_p && GET_CODE (x) == PLUS
29697 && SSE_CLASS_P (rclass)
29698 && SCALAR_INT_MODE_P (mode))
29699 return GENERAL_REGS;
29704 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
29707 ix86_class_likely_spilled_p (reg_class_t rclass)
29718 case SSE_FIRST_REG:
29720 case FP_SECOND_REG:
29730 /* If we are copying between general and FP registers, we need a memory
29731 location. The same is true for SSE and MMX registers.
29733 To optimize register_move_cost performance, allow inline variant.
29735 The macro can't work reliably when one of the CLASSES is class containing
29736 registers from multiple units (SSE, MMX, integer). We avoid this by never
29737 combining those units in single alternative in the machine description.
29738 Ensure that this constraint holds to avoid unexpected surprises.
29740 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
29741 enforce these sanity checks. */
29744 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
29745 enum machine_mode mode, int strict)
29747 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
29748 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
29749 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
29750 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
29751 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
29752 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
29754 gcc_assert (!strict);
29758 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
29761 /* ??? This is a lie. We do have moves between mmx/general, and for
29762 mmx/sse2. But by saying we need secondary memory we discourage the
29763 register allocator from using the mmx registers unless needed. */
29764 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
29767 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
29769 /* SSE1 doesn't have any direct moves from other classes. */
29773 /* If the target says that inter-unit moves are more expensive
29774 than moving through memory, then don't generate them. */
29775 if (!TARGET_INTER_UNIT_MOVES)
29778 /* Between SSE and general, we have moves no larger than word size. */
29779 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
29787 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
29788 enum machine_mode mode, int strict)
29790 return inline_secondary_memory_needed (class1, class2, mode, strict);
29793 /* Implement the TARGET_CLASS_MAX_NREGS hook.
29795 On the 80386, this is the size of MODE in words,
29796 except in the FP regs, where a single reg is always enough. */
29798 static unsigned char
29799 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
29801 if (MAYBE_INTEGER_CLASS_P (rclass))
29803 if (mode == XFmode)
29804 return (TARGET_64BIT ? 2 : 3);
29805 else if (mode == XCmode)
29806 return (TARGET_64BIT ? 4 : 6);
29808 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
29812 if (COMPLEX_MODE_P (mode))
29819 /* Return true if the registers in CLASS cannot represent the change from
29820 modes FROM to TO. */
29823 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
29824 enum reg_class regclass)
29829 /* x87 registers can't do subreg at all, as all values are reformatted
29830 to extended precision. */
29831 if (MAYBE_FLOAT_CLASS_P (regclass))
29834 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
29836 /* Vector registers do not support QI or HImode loads. If we don't
29837 disallow a change to these modes, reload will assume it's ok to
29838 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
29839 the vec_dupv4hi pattern. */
29840 if (GET_MODE_SIZE (from) < 4)
29843 /* Vector registers do not support subreg with nonzero offsets, which
29844 are otherwise valid for integer registers. Since we can't see
29845 whether we have a nonzero offset from here, prohibit all
29846 nonparadoxical subregs changing size. */
29847 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
29854 /* Return the cost of moving data of mode M between a
29855 register and memory. A value of 2 is the default; this cost is
29856 relative to those in `REGISTER_MOVE_COST'.
29858 This function is used extensively by register_move_cost that is used to
29859 build tables at startup. Make it inline in this case.
29860 When IN is 2, return maximum of in and out move cost.
29862 If moving between registers and memory is more expensive than
29863 between two registers, you should define this macro to express the
29866 Model also increased moving costs of QImode registers in non
29870 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
29874 if (FLOAT_CLASS_P (regclass))
29892 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
29893 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
29895 if (SSE_CLASS_P (regclass))
29898 switch (GET_MODE_SIZE (mode))
29913 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
29914 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
29916 if (MMX_CLASS_P (regclass))
29919 switch (GET_MODE_SIZE (mode))
29931 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
29932 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
29934 switch (GET_MODE_SIZE (mode))
29937 if (Q_CLASS_P (regclass) || TARGET_64BIT)
29940 return ix86_cost->int_store[0];
29941 if (TARGET_PARTIAL_REG_DEPENDENCY
29942 && optimize_function_for_speed_p (cfun))
29943 cost = ix86_cost->movzbl_load;
29945 cost = ix86_cost->int_load[0];
29947 return MAX (cost, ix86_cost->int_store[0]);
29953 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
29955 return ix86_cost->movzbl_load;
29957 return ix86_cost->int_store[0] + 4;
29962 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
29963 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
29965 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
29966 if (mode == TFmode)
29969 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
29971 cost = ix86_cost->int_load[2];
29973 cost = ix86_cost->int_store[2];
29974 return (cost * (((int) GET_MODE_SIZE (mode)
29975 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
29980 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
29983 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
29987 /* Return the cost of moving data from a register in class CLASS1 to
29988 one in class CLASS2.
29990 It is not required that the cost always equal 2 when FROM is the same as TO;
29991 on some machines it is expensive to move between registers if they are not
29992 general registers. */
29995 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
29996 reg_class_t class2_i)
29998 enum reg_class class1 = (enum reg_class) class1_i;
29999 enum reg_class class2 = (enum reg_class) class2_i;
30001 /* In case we require secondary memory, compute cost of the store followed
30002 by load. In order to avoid bad register allocation choices, we need
30003 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
30005 if (inline_secondary_memory_needed (class1, class2, mode, 0))
30009 cost += inline_memory_move_cost (mode, class1, 2);
30010 cost += inline_memory_move_cost (mode, class2, 2);
30012 /* In case of copying from general_purpose_register we may emit multiple
30013 stores followed by single load causing memory size mismatch stall.
30014 Count this as arbitrarily high cost of 20. */
30015 if (targetm.class_max_nregs (class1, mode)
30016 > targetm.class_max_nregs (class2, mode))
30019 /* In the case of FP/MMX moves, the registers actually overlap, and we
30020 have to switch modes in order to treat them differently. */
30021 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
30022 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
30028 /* Moves between SSE/MMX and integer unit are expensive. */
30029 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
30030 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
30032 /* ??? By keeping returned value relatively high, we limit the number
30033 of moves between integer and MMX/SSE registers for all targets.
30034 Additionally, high value prevents problem with x86_modes_tieable_p(),
30035 where integer modes in MMX/SSE registers are not tieable
30036 because of missing QImode and HImode moves to, from or between
30037 MMX/SSE registers. */
30038 return MAX (8, ix86_cost->mmxsse_to_integer);
30040 if (MAYBE_FLOAT_CLASS_P (class1))
30041 return ix86_cost->fp_move;
30042 if (MAYBE_SSE_CLASS_P (class1))
30043 return ix86_cost->sse_move;
30044 if (MAYBE_MMX_CLASS_P (class1))
30045 return ix86_cost->mmx_move;
30049 /* Return TRUE if hard register REGNO can hold a value of machine-mode
30053 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
30055 /* Flags and only flags can only hold CCmode values. */
30056 if (CC_REGNO_P (regno))
30057 return GET_MODE_CLASS (mode) == MODE_CC;
30058 if (GET_MODE_CLASS (mode) == MODE_CC
30059 || GET_MODE_CLASS (mode) == MODE_RANDOM
30060 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
30062 if (FP_REGNO_P (regno))
30063 return VALID_FP_MODE_P (mode);
30064 if (SSE_REGNO_P (regno))
30066 /* We implement the move patterns for all vector modes into and
30067 out of SSE registers, even when no operation instructions
30068 are available. OImode move is available only when AVX is
30070 return ((TARGET_AVX && mode == OImode)
30071 || VALID_AVX256_REG_MODE (mode)
30072 || VALID_SSE_REG_MODE (mode)
30073 || VALID_SSE2_REG_MODE (mode)
30074 || VALID_MMX_REG_MODE (mode)
30075 || VALID_MMX_REG_MODE_3DNOW (mode));
30077 if (MMX_REGNO_P (regno))
30079 /* We implement the move patterns for 3DNOW modes even in MMX mode,
30080 so if the register is available at all, then we can move data of
30081 the given mode into or out of it. */
30082 return (VALID_MMX_REG_MODE (mode)
30083 || VALID_MMX_REG_MODE_3DNOW (mode));
30086 if (mode == QImode)
30088 /* Take care for QImode values - they can be in non-QI regs,
30089 but then they do cause partial register stalls. */
30090 if (regno <= BX_REG || TARGET_64BIT)
30092 if (!TARGET_PARTIAL_REG_STALL)
30094 return !can_create_pseudo_p ();
30096 /* We handle both integer and floats in the general purpose registers. */
30097 else if (VALID_INT_MODE_P (mode))
30099 else if (VALID_FP_MODE_P (mode))
30101 else if (VALID_DFP_MODE_P (mode))
30103 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
30104 on to use that value in smaller contexts, this can easily force a
30105 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
30106 supporting DImode, allow it. */
30107 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
30113 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
30114 tieable integer mode. */
30117 ix86_tieable_integer_mode_p (enum machine_mode mode)
30126 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
30129 return TARGET_64BIT;
30136 /* Return true if MODE1 is accessible in a register that can hold MODE2
30137 without copying. That is, all register classes that can hold MODE2
30138 can also hold MODE1. */
30141 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
30143 if (mode1 == mode2)
30146 if (ix86_tieable_integer_mode_p (mode1)
30147 && ix86_tieable_integer_mode_p (mode2))
30150 /* MODE2 being XFmode implies fp stack or general regs, which means we
30151 can tie any smaller floating point modes to it. Note that we do not
30152 tie this with TFmode. */
30153 if (mode2 == XFmode)
30154 return mode1 == SFmode || mode1 == DFmode;
30156 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
30157 that we can tie it with SFmode. */
30158 if (mode2 == DFmode)
30159 return mode1 == SFmode;
30161 /* If MODE2 is only appropriate for an SSE register, then tie with
30162 any other mode acceptable to SSE registers. */
30163 if (GET_MODE_SIZE (mode2) == 16
30164 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
30165 return (GET_MODE_SIZE (mode1) == 16
30166 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
30168 /* If MODE2 is appropriate for an MMX register, then tie
30169 with any other mode acceptable to MMX registers. */
30170 if (GET_MODE_SIZE (mode2) == 8
30171 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
30172 return (GET_MODE_SIZE (mode1) == 8
30173 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
30178 /* Compute a (partial) cost for rtx X. Return true if the complete
30179 cost has been computed, and false if subexpressions should be
30180 scanned. In either case, *TOTAL contains the cost result. */
30183 ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
30186 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
30187 enum machine_mode mode = GET_MODE (x);
30188 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
30196 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
30198 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
30200 else if (flag_pic && SYMBOLIC_CONST (x)
30202 || (!GET_CODE (x) != LABEL_REF
30203 && (GET_CODE (x) != SYMBOL_REF
30204 || !SYMBOL_REF_LOCAL_P (x)))))
30211 if (mode == VOIDmode)
30214 switch (standard_80387_constant_p (x))
30219 default: /* Other constants */
30224 /* Start with (MEM (SYMBOL_REF)), since that's where
30225 it'll probably end up. Add a penalty for size. */
30226 *total = (COSTS_N_INSNS (1)
30227 + (flag_pic != 0 && !TARGET_64BIT)
30228 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
30234 /* The zero extensions is often completely free on x86_64, so make
30235 it as cheap as possible. */
30236 if (TARGET_64BIT && mode == DImode
30237 && GET_MODE (XEXP (x, 0)) == SImode)
30239 else if (TARGET_ZERO_EXTEND_WITH_AND)
30240 *total = cost->add;
30242 *total = cost->movzx;
30246 *total = cost->movsx;
30250 if (CONST_INT_P (XEXP (x, 1))
30251 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
30253 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
30256 *total = cost->add;
30259 if ((value == 2 || value == 3)
30260 && cost->lea <= cost->shift_const)
30262 *total = cost->lea;
30272 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
30274 if (CONST_INT_P (XEXP (x, 1)))
30276 if (INTVAL (XEXP (x, 1)) > 32)
30277 *total = cost->shift_const + COSTS_N_INSNS (2);
30279 *total = cost->shift_const * 2;
30283 if (GET_CODE (XEXP (x, 1)) == AND)
30284 *total = cost->shift_var * 2;
30286 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
30291 if (CONST_INT_P (XEXP (x, 1)))
30292 *total = cost->shift_const;
30294 *total = cost->shift_var;
30302 gcc_assert (FLOAT_MODE_P (mode));
30303 gcc_assert (TARGET_FMA || TARGET_FMA4);
30305 /* ??? SSE scalar/vector cost should be used here. */
30306 /* ??? Bald assumption that fma has the same cost as fmul. */
30307 *total = cost->fmul;
30308 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
30310 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
30312 if (GET_CODE (sub) == NEG)
30313 sub = XEXP (sub, 0);
30314 *total += rtx_cost (sub, FMA, 0, speed);
30317 if (GET_CODE (sub) == NEG)
30318 sub = XEXP (sub, 0);
30319 *total += rtx_cost (sub, FMA, 2, speed);
30324 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30326 /* ??? SSE scalar cost should be used here. */
30327 *total = cost->fmul;
30330 else if (X87_FLOAT_MODE_P (mode))
30332 *total = cost->fmul;
30335 else if (FLOAT_MODE_P (mode))
30337 /* ??? SSE vector cost should be used here. */
30338 *total = cost->fmul;
30343 rtx op0 = XEXP (x, 0);
30344 rtx op1 = XEXP (x, 1);
30346 if (CONST_INT_P (XEXP (x, 1)))
30348 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
30349 for (nbits = 0; value != 0; value &= value - 1)
30353 /* This is arbitrary. */
30356 /* Compute costs correctly for widening multiplication. */
30357 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
30358 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
30359 == GET_MODE_SIZE (mode))
30361 int is_mulwiden = 0;
30362 enum machine_mode inner_mode = GET_MODE (op0);
30364 if (GET_CODE (op0) == GET_CODE (op1))
30365 is_mulwiden = 1, op1 = XEXP (op1, 0);
30366 else if (CONST_INT_P (op1))
30368 if (GET_CODE (op0) == SIGN_EXTEND)
30369 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
30372 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
30376 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
30379 *total = (cost->mult_init[MODE_INDEX (mode)]
30380 + nbits * cost->mult_bit
30381 + rtx_cost (op0, outer_code, opno, speed)
30382 + rtx_cost (op1, outer_code, opno, speed));
30391 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30392 /* ??? SSE cost should be used here. */
30393 *total = cost->fdiv;
30394 else if (X87_FLOAT_MODE_P (mode))
30395 *total = cost->fdiv;
30396 else if (FLOAT_MODE_P (mode))
30397 /* ??? SSE vector cost should be used here. */
30398 *total = cost->fdiv;
30400 *total = cost->divide[MODE_INDEX (mode)];
30404 if (GET_MODE_CLASS (mode) == MODE_INT
30405 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
30407 if (GET_CODE (XEXP (x, 0)) == PLUS
30408 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
30409 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
30410 && CONSTANT_P (XEXP (x, 1)))
30412 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
30413 if (val == 2 || val == 4 || val == 8)
30415 *total = cost->lea;
30416 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
30417 outer_code, opno, speed);
30418 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
30419 outer_code, opno, speed);
30420 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
30424 else if (GET_CODE (XEXP (x, 0)) == MULT
30425 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
30427 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
30428 if (val == 2 || val == 4 || val == 8)
30430 *total = cost->lea;
30431 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
30432 outer_code, opno, speed);
30433 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
30437 else if (GET_CODE (XEXP (x, 0)) == PLUS)
30439 *total = cost->lea;
30440 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
30441 outer_code, opno, speed);
30442 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
30443 outer_code, opno, speed);
30444 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
30451 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30453 /* ??? SSE cost should be used here. */
30454 *total = cost->fadd;
30457 else if (X87_FLOAT_MODE_P (mode))
30459 *total = cost->fadd;
30462 else if (FLOAT_MODE_P (mode))
30464 /* ??? SSE vector cost should be used here. */
30465 *total = cost->fadd;
30473 if (!TARGET_64BIT && mode == DImode)
30475 *total = (cost->add * 2
30476 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
30477 << (GET_MODE (XEXP (x, 0)) != DImode))
30478 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
30479 << (GET_MODE (XEXP (x, 1)) != DImode)));
30485 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30487 /* ??? SSE cost should be used here. */
30488 *total = cost->fchs;
30491 else if (X87_FLOAT_MODE_P (mode))
30493 *total = cost->fchs;
30496 else if (FLOAT_MODE_P (mode))
30498 /* ??? SSE vector cost should be used here. */
30499 *total = cost->fchs;
30505 if (!TARGET_64BIT && mode == DImode)
30506 *total = cost->add * 2;
30508 *total = cost->add;
30512 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
30513 && XEXP (XEXP (x, 0), 1) == const1_rtx
30514 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
30515 && XEXP (x, 1) == const0_rtx)
30517 /* This kind of construct is implemented using test[bwl].
30518 Treat it as if we had an AND. */
30519 *total = (cost->add
30520 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
30521 + rtx_cost (const1_rtx, outer_code, opno, speed));
30527 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
30532 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30533 /* ??? SSE cost should be used here. */
30534 *total = cost->fabs;
30535 else if (X87_FLOAT_MODE_P (mode))
30536 *total = cost->fabs;
30537 else if (FLOAT_MODE_P (mode))
30538 /* ??? SSE vector cost should be used here. */
30539 *total = cost->fabs;
30543 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30544 /* ??? SSE cost should be used here. */
30545 *total = cost->fsqrt;
30546 else if (X87_FLOAT_MODE_P (mode))
30547 *total = cost->fsqrt;
30548 else if (FLOAT_MODE_P (mode))
30549 /* ??? SSE vector cost should be used here. */
30550 *total = cost->fsqrt;
30554 if (XINT (x, 1) == UNSPEC_TP)
30561 case VEC_DUPLICATE:
30562 /* ??? Assume all of these vector manipulation patterns are
30563 recognizable. In which case they all pretty much have the
30565 *total = COSTS_N_INSNS (1);
30575 static int current_machopic_label_num;
30577 /* Given a symbol name and its associated stub, write out the
30578 definition of the stub. */
30581 machopic_output_stub (FILE *file, const char *symb, const char *stub)
30583 unsigned int length;
30584 char *binder_name, *symbol_name, lazy_ptr_name[32];
30585 int label = ++current_machopic_label_num;
30587 /* For 64-bit we shouldn't get here. */
30588 gcc_assert (!TARGET_64BIT);
30590 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
30591 symb = targetm.strip_name_encoding (symb);
30593 length = strlen (stub);
30594 binder_name = XALLOCAVEC (char, length + 32);
30595 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
30597 length = strlen (symb);
30598 symbol_name = XALLOCAVEC (char, length + 32);
30599 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
30601 sprintf (lazy_ptr_name, "L%d$lz", label);
30603 if (MACHOPIC_ATT_STUB)
30604 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
30605 else if (MACHOPIC_PURE)
30606 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
30608 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
30610 fprintf (file, "%s:\n", stub);
30611 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
30613 if (MACHOPIC_ATT_STUB)
30615 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
30617 else if (MACHOPIC_PURE)
30620 /* 25-byte PIC stub using "CALL get_pc_thunk". */
30621 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
30622 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
30623 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
30624 label, lazy_ptr_name, label);
30625 fprintf (file, "\tjmp\t*%%ecx\n");
30628 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
30630 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
30631 it needs no stub-binding-helper. */
30632 if (MACHOPIC_ATT_STUB)
30635 fprintf (file, "%s:\n", binder_name);
30639 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
30640 fprintf (file, "\tpushl\t%%ecx\n");
30643 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
30645 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
30647 /* N.B. Keep the correspondence of these
30648 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
30649 old-pic/new-pic/non-pic stubs; altering this will break
30650 compatibility with existing dylibs. */
30653 /* 25-byte PIC stub using "CALL get_pc_thunk". */
30654 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
30657 /* 16-byte -mdynamic-no-pic stub. */
30658 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
30660 fprintf (file, "%s:\n", lazy_ptr_name);
30661 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
30662 fprintf (file, ASM_LONG "%s\n", binder_name);
30664 #endif /* TARGET_MACHO */
30666 /* Order the registers for register allocator. */
30669 x86_order_regs_for_local_alloc (void)
30674 /* First allocate the local general purpose registers. */
30675 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
30676 if (GENERAL_REGNO_P (i) && call_used_regs[i])
30677 reg_alloc_order [pos++] = i;
30679 /* Global general purpose registers. */
30680 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
30681 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
30682 reg_alloc_order [pos++] = i;
30684 /* x87 registers come first in case we are doing FP math
30686 if (!TARGET_SSE_MATH)
30687 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
30688 reg_alloc_order [pos++] = i;
30690 /* SSE registers. */
30691 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
30692 reg_alloc_order [pos++] = i;
30693 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
30694 reg_alloc_order [pos++] = i;
30696 /* x87 registers. */
30697 if (TARGET_SSE_MATH)
30698 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
30699 reg_alloc_order [pos++] = i;
30701 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
30702 reg_alloc_order [pos++] = i;
30704 /* Initialize the rest of array as we do not allocate some registers
30706 while (pos < FIRST_PSEUDO_REGISTER)
30707 reg_alloc_order [pos++] = 0;
30710 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
30711 in struct attribute_spec handler. */
30713 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
30715 int flags ATTRIBUTE_UNUSED,
30716 bool *no_add_attrs)
30718 if (TREE_CODE (*node) != FUNCTION_TYPE
30719 && TREE_CODE (*node) != METHOD_TYPE
30720 && TREE_CODE (*node) != FIELD_DECL
30721 && TREE_CODE (*node) != TYPE_DECL)
30723 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30725 *no_add_attrs = true;
30730 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
30732 *no_add_attrs = true;
30735 if (is_attribute_p ("callee_pop_aggregate_return", name))
30739 cst = TREE_VALUE (args);
30740 if (TREE_CODE (cst) != INTEGER_CST)
30742 warning (OPT_Wattributes,
30743 "%qE attribute requires an integer constant argument",
30745 *no_add_attrs = true;
30747 else if (compare_tree_int (cst, 0) != 0
30748 && compare_tree_int (cst, 1) != 0)
30750 warning (OPT_Wattributes,
30751 "argument to %qE attribute is neither zero, nor one",
30753 *no_add_attrs = true;
30762 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
30763 struct attribute_spec.handler. */
30765 ix86_handle_abi_attribute (tree *node, tree name,
30766 tree args ATTRIBUTE_UNUSED,
30767 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30769 if (TREE_CODE (*node) != FUNCTION_TYPE
30770 && TREE_CODE (*node) != METHOD_TYPE
30771 && TREE_CODE (*node) != FIELD_DECL
30772 && TREE_CODE (*node) != TYPE_DECL)
30774 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30776 *no_add_attrs = true;
30780 /* Can combine regparm with all attributes but fastcall. */
30781 if (is_attribute_p ("ms_abi", name))
30783 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
30785 error ("ms_abi and sysv_abi attributes are not compatible");
30790 else if (is_attribute_p ("sysv_abi", name))
30792 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
30794 error ("ms_abi and sysv_abi attributes are not compatible");
30803 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
30804 struct attribute_spec.handler. */
30806 ix86_handle_struct_attribute (tree *node, tree name,
30807 tree args ATTRIBUTE_UNUSED,
30808 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30811 if (DECL_P (*node))
30813 if (TREE_CODE (*node) == TYPE_DECL)
30814 type = &TREE_TYPE (*node);
30819 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
30820 || TREE_CODE (*type) == UNION_TYPE)))
30822 warning (OPT_Wattributes, "%qE attribute ignored",
30824 *no_add_attrs = true;
30827 else if ((is_attribute_p ("ms_struct", name)
30828 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
30829 || ((is_attribute_p ("gcc_struct", name)
30830 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
30832 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
30834 *no_add_attrs = true;
30841 ix86_handle_fndecl_attribute (tree *node, tree name,
30842 tree args ATTRIBUTE_UNUSED,
30843 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30845 if (TREE_CODE (*node) != FUNCTION_DECL)
30847 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30849 *no_add_attrs = true;
30855 ix86_ms_bitfield_layout_p (const_tree record_type)
30857 return ((TARGET_MS_BITFIELD_LAYOUT
30858 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
30859 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
30862 /* Returns an expression indicating where the this parameter is
30863 located on entry to the FUNCTION. */
30866 x86_this_parameter (tree function)
30868 tree type = TREE_TYPE (function);
30869 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
30874 const int *parm_regs;
30876 if (ix86_function_type_abi (type) == MS_ABI)
30877 parm_regs = x86_64_ms_abi_int_parameter_registers;
30879 parm_regs = x86_64_int_parameter_registers;
30880 return gen_rtx_REG (DImode, parm_regs[aggr]);
30883 nregs = ix86_function_regparm (type, function);
30885 if (nregs > 0 && !stdarg_p (type))
30888 unsigned int ccvt = ix86_get_callcvt (type);
30890 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
30891 regno = aggr ? DX_REG : CX_REG;
30892 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
30896 return gen_rtx_MEM (SImode,
30897 plus_constant (stack_pointer_rtx, 4));
30906 return gen_rtx_MEM (SImode,
30907 plus_constant (stack_pointer_rtx, 4));
30910 return gen_rtx_REG (SImode, regno);
30913 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
30916 /* Determine whether x86_output_mi_thunk can succeed. */
30919 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
30920 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
30921 HOST_WIDE_INT vcall_offset, const_tree function)
30923 /* 64-bit can handle anything. */
30927 /* For 32-bit, everything's fine if we have one free register. */
30928 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
30931 /* Need a free register for vcall_offset. */
30935 /* Need a free register for GOT references. */
30936 if (flag_pic && !targetm.binds_local_p (function))
30939 /* Otherwise ok. */
30943 /* Output the assembler code for a thunk function. THUNK_DECL is the
30944 declaration for the thunk function itself, FUNCTION is the decl for
30945 the target function. DELTA is an immediate constant offset to be
30946 added to THIS. If VCALL_OFFSET is nonzero, the word at
30947 *(*this + vcall_offset) should be added to THIS. */
30950 x86_output_mi_thunk (FILE *file,
30951 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
30952 HOST_WIDE_INT vcall_offset, tree function)
30954 rtx this_param = x86_this_parameter (function);
30955 rtx this_reg, tmp, fnaddr;
30957 emit_note (NOTE_INSN_PROLOGUE_END);
30959 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
30960 pull it in now and let DELTA benefit. */
30961 if (REG_P (this_param))
30962 this_reg = this_param;
30963 else if (vcall_offset)
30965 /* Put the this parameter into %eax. */
30966 this_reg = gen_rtx_REG (Pmode, AX_REG);
30967 emit_move_insn (this_reg, this_param);
30970 this_reg = NULL_RTX;
30972 /* Adjust the this parameter by a fixed constant. */
30975 rtx delta_rtx = GEN_INT (delta);
30976 rtx delta_dst = this_reg ? this_reg : this_param;
30980 if (!x86_64_general_operand (delta_rtx, Pmode))
30982 tmp = gen_rtx_REG (Pmode, R10_REG);
30983 emit_move_insn (tmp, delta_rtx);
30988 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
30991 /* Adjust the this parameter by a value stored in the vtable. */
30994 rtx vcall_addr, vcall_mem, this_mem;
30995 unsigned int tmp_regno;
30998 tmp_regno = R10_REG;
31001 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
31002 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
31003 tmp_regno = AX_REG;
31005 tmp_regno = CX_REG;
31007 tmp = gen_rtx_REG (Pmode, tmp_regno);
31009 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
31010 if (Pmode != ptr_mode)
31011 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
31012 emit_move_insn (tmp, this_mem);
31014 /* Adjust the this parameter. */
31015 vcall_addr = plus_constant (tmp, vcall_offset);
31017 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
31019 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
31020 emit_move_insn (tmp2, GEN_INT (vcall_offset));
31021 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
31024 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
31025 if (Pmode != ptr_mode)
31026 emit_insn (gen_addsi_1_zext (this_reg,
31027 gen_rtx_REG (ptr_mode,
31031 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
31034 /* If necessary, drop THIS back to its stack slot. */
31035 if (this_reg && this_reg != this_param)
31036 emit_move_insn (this_param, this_reg);
31038 fnaddr = XEXP (DECL_RTL (function), 0);
31041 if (!flag_pic || targetm.binds_local_p (function)
31042 || cfun->machine->call_abi == MS_ABI)
31046 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
31047 tmp = gen_rtx_CONST (Pmode, tmp);
31048 fnaddr = gen_rtx_MEM (Pmode, tmp);
31053 if (!flag_pic || targetm.binds_local_p (function))
31056 else if (TARGET_MACHO)
31058 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
31059 fnaddr = XEXP (fnaddr, 0);
31061 #endif /* TARGET_MACHO */
31064 tmp = gen_rtx_REG (Pmode, CX_REG);
31065 output_set_got (tmp, NULL_RTX);
31067 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
31068 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
31069 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
31073 /* Our sibling call patterns do not allow memories, because we have no
31074 predicate that can distinguish between frame and non-frame memory.
31075 For our purposes here, we can get away with (ab)using a jump pattern,
31076 because we're going to do no optimization. */
31077 if (MEM_P (fnaddr))
31078 emit_jump_insn (gen_indirect_jump (fnaddr));
31081 tmp = gen_rtx_MEM (QImode, fnaddr);
31082 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
31083 tmp = emit_call_insn (tmp);
31084 SIBLING_CALL_P (tmp) = 1;
31088 /* Emit just enough of rest_of_compilation to get the insns emitted.
31089 Note that use_thunk calls assemble_start_function et al. */
31090 tmp = get_insns ();
31091 insn_locators_alloc ();
31092 shorten_branches (tmp);
31093 final_start_function (tmp, file, 1);
31094 final (tmp, file, 1);
31095 final_end_function ();
31099 x86_file_start (void)
31101 default_file_start ();
31103 darwin_file_start ();
31105 if (X86_FILE_START_VERSION_DIRECTIVE)
31106 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
31107 if (X86_FILE_START_FLTUSED)
31108 fputs ("\t.global\t__fltused\n", asm_out_file);
31109 if (ix86_asm_dialect == ASM_INTEL)
31110 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
31114 x86_field_alignment (tree field, int computed)
31116 enum machine_mode mode;
31117 tree type = TREE_TYPE (field);
31119 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
31121 mode = TYPE_MODE (strip_array_types (type));
31122 if (mode == DFmode || mode == DCmode
31123 || GET_MODE_CLASS (mode) == MODE_INT
31124 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
31125 return MIN (32, computed);
31129 /* Output assembler code to FILE to increment profiler label # LABELNO
31130 for profiling a function entry. */
31132 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
31134 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
31139 #ifndef NO_PROFILE_COUNTERS
31140 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
31143 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
31144 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
31146 fprintf (file, "\tcall\t%s\n", mcount_name);
31150 #ifndef NO_PROFILE_COUNTERS
31151 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
31154 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
31158 #ifndef NO_PROFILE_COUNTERS
31159 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
31162 fprintf (file, "\tcall\t%s\n", mcount_name);
31166 /* We don't have exact information about the insn sizes, but we may assume
31167 quite safely that we are informed about all 1 byte insns and memory
31168 address sizes. This is enough to eliminate unnecessary padding in
31172 min_insn_size (rtx insn)
31176 if (!INSN_P (insn) || !active_insn_p (insn))
31179 /* Discard alignments we've emit and jump instructions. */
31180 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
31181 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
31183 if (JUMP_TABLE_DATA_P (insn))
31186 /* Important case - calls are always 5 bytes.
31187 It is common to have many calls in the row. */
31189 && symbolic_reference_mentioned_p (PATTERN (insn))
31190 && !SIBLING_CALL_P (insn))
31192 len = get_attr_length (insn);
31196 /* For normal instructions we rely on get_attr_length being exact,
31197 with a few exceptions. */
31198 if (!JUMP_P (insn))
31200 enum attr_type type = get_attr_type (insn);
31205 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
31206 || asm_noperands (PATTERN (insn)) >= 0)
31213 /* Otherwise trust get_attr_length. */
31217 l = get_attr_length_address (insn);
31218 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
31227 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
31229 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
31233 ix86_avoid_jump_mispredicts (void)
31235 rtx insn, start = get_insns ();
31236 int nbytes = 0, njumps = 0;
31239 /* Look for all minimal intervals of instructions containing 4 jumps.
31240 The intervals are bounded by START and INSN. NBYTES is the total
31241 size of instructions in the interval including INSN and not including
31242 START. When the NBYTES is smaller than 16 bytes, it is possible
31243 that the end of START and INSN ends up in the same 16byte page.
31245 The smallest offset in the page INSN can start is the case where START
31246 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
31247 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
31249 for (insn = start; insn; insn = NEXT_INSN (insn))
31253 if (LABEL_P (insn))
31255 int align = label_to_alignment (insn);
31256 int max_skip = label_to_max_skip (insn);
31260 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
31261 already in the current 16 byte page, because otherwise
31262 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
31263 bytes to reach 16 byte boundary. */
31265 || (align <= 3 && max_skip != (1 << align) - 1))
31268 fprintf (dump_file, "Label %i with max_skip %i\n",
31269 INSN_UID (insn), max_skip);
31272 while (nbytes + max_skip >= 16)
31274 start = NEXT_INSN (start);
31275 if ((JUMP_P (start)
31276 && GET_CODE (PATTERN (start)) != ADDR_VEC
31277 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
31279 njumps--, isjump = 1;
31282 nbytes -= min_insn_size (start);
31288 min_size = min_insn_size (insn);
31289 nbytes += min_size;
31291 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
31292 INSN_UID (insn), min_size);
31294 && GET_CODE (PATTERN (insn)) != ADDR_VEC
31295 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
31303 start = NEXT_INSN (start);
31304 if ((JUMP_P (start)
31305 && GET_CODE (PATTERN (start)) != ADDR_VEC
31306 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
31308 njumps--, isjump = 1;
31311 nbytes -= min_insn_size (start);
31313 gcc_assert (njumps >= 0);
31315 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
31316 INSN_UID (start), INSN_UID (insn), nbytes);
31318 if (njumps == 3 && isjump && nbytes < 16)
31320 int padsize = 15 - nbytes + min_insn_size (insn);
31323 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
31324 INSN_UID (insn), padsize);
31325 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
31331 /* AMD Athlon works faster
31332 when RET is not destination of conditional jump or directly preceded
31333 by other jump instruction. We avoid the penalty by inserting NOP just
31334 before the RET instructions in such cases. */
31336 ix86_pad_returns (void)
31341 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
31343 basic_block bb = e->src;
31344 rtx ret = BB_END (bb);
31346 bool replace = false;
31348 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
31349 || optimize_bb_for_size_p (bb))
31351 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
31352 if (active_insn_p (prev) || LABEL_P (prev))
31354 if (prev && LABEL_P (prev))
31359 FOR_EACH_EDGE (e, ei, bb->preds)
31360 if (EDGE_FREQUENCY (e) && e->src->index >= 0
31361 && !(e->flags & EDGE_FALLTHRU))
31366 prev = prev_active_insn (ret);
31368 && ((JUMP_P (prev) && any_condjump_p (prev))
31371 /* Empty functions get branch mispredict even when
31372 the jump destination is not visible to us. */
31373 if (!prev && !optimize_function_for_size_p (cfun))
31378 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
31384 /* Count the minimum number of instructions in BB. Return 4 if the
31385 number of instructions >= 4. */
31388 ix86_count_insn_bb (basic_block bb)
31391 int insn_count = 0;
31393 /* Count number of instructions in this block. Return 4 if the number
31394 of instructions >= 4. */
31395 FOR_BB_INSNS (bb, insn)
31397 /* Only happen in exit blocks. */
31399 && ANY_RETURN_P (PATTERN (insn)))
31402 if (NONDEBUG_INSN_P (insn)
31403 && GET_CODE (PATTERN (insn)) != USE
31404 && GET_CODE (PATTERN (insn)) != CLOBBER)
31407 if (insn_count >= 4)
31416 /* Count the minimum number of instructions in code path in BB.
31417 Return 4 if the number of instructions >= 4. */
31420 ix86_count_insn (basic_block bb)
31424 int min_prev_count;
31426 /* Only bother counting instructions along paths with no
31427 more than 2 basic blocks between entry and exit. Given
31428 that BB has an edge to exit, determine if a predecessor
31429 of BB has an edge from entry. If so, compute the number
31430 of instructions in the predecessor block. If there
31431 happen to be multiple such blocks, compute the minimum. */
31432 min_prev_count = 4;
31433 FOR_EACH_EDGE (e, ei, bb->preds)
31436 edge_iterator prev_ei;
31438 if (e->src == ENTRY_BLOCK_PTR)
31440 min_prev_count = 0;
31443 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
31445 if (prev_e->src == ENTRY_BLOCK_PTR)
31447 int count = ix86_count_insn_bb (e->src);
31448 if (count < min_prev_count)
31449 min_prev_count = count;
31455 if (min_prev_count < 4)
31456 min_prev_count += ix86_count_insn_bb (bb);
31458 return min_prev_count;
31461 /* Pad short funtion to 4 instructions. */
31464 ix86_pad_short_function (void)
31469 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
31471 rtx ret = BB_END (e->src);
31472 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
31474 int insn_count = ix86_count_insn (e->src);
31476 /* Pad short function. */
31477 if (insn_count < 4)
31481 /* Find epilogue. */
31484 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
31485 insn = PREV_INSN (insn);
31490 /* Two NOPs count as one instruction. */
31491 insn_count = 2 * (4 - insn_count);
31492 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
31498 /* Implement machine specific optimizations. We implement padding of returns
31499 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
31503 /* We are freeing block_for_insn in the toplev to keep compatibility
31504 with old MDEP_REORGS that are not CFG based. Recompute it now. */
31505 compute_bb_for_insn ();
31507 /* Run the vzeroupper optimization if needed. */
31508 if (TARGET_VZEROUPPER)
31509 move_or_delete_vzeroupper ();
31511 if (optimize && optimize_function_for_speed_p (cfun))
31513 if (TARGET_PAD_SHORT_FUNCTION)
31514 ix86_pad_short_function ();
31515 else if (TARGET_PAD_RETURNS)
31516 ix86_pad_returns ();
31517 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
31518 if (TARGET_FOUR_JUMP_LIMIT)
31519 ix86_avoid_jump_mispredicts ();
31524 /* Return nonzero when QImode register that must be represented via REX prefix
31527 x86_extended_QIreg_mentioned_p (rtx insn)
31530 extract_insn_cached (insn);
31531 for (i = 0; i < recog_data.n_operands; i++)
31532 if (REG_P (recog_data.operand[i])
31533 && REGNO (recog_data.operand[i]) > BX_REG)
31538 /* Return nonzero when P points to register encoded via REX prefix.
31539 Called via for_each_rtx. */
31541 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
31543 unsigned int regno;
31546 regno = REGNO (*p);
31547 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
31550 /* Return true when INSN mentions register that must be encoded using REX
31553 x86_extended_reg_mentioned_p (rtx insn)
31555 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
31556 extended_reg_mentioned_1, NULL);
31559 /* If profitable, negate (without causing overflow) integer constant
31560 of mode MODE at location LOC. Return true in this case. */
31562 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
31566 if (!CONST_INT_P (*loc))
31572 /* DImode x86_64 constants must fit in 32 bits. */
31573 gcc_assert (x86_64_immediate_operand (*loc, mode));
31584 gcc_unreachable ();
31587 /* Avoid overflows. */
31588 if (mode_signbit_p (mode, *loc))
31591 val = INTVAL (*loc);
31593 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
31594 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
31595 if ((val < 0 && val != -128)
31598 *loc = GEN_INT (-val);
31605 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
31606 optabs would emit if we didn't have TFmode patterns. */
31609 x86_emit_floatuns (rtx operands[2])
31611 rtx neglab, donelab, i0, i1, f0, in, out;
31612 enum machine_mode mode, inmode;
31614 inmode = GET_MODE (operands[1]);
31615 gcc_assert (inmode == SImode || inmode == DImode);
31618 in = force_reg (inmode, operands[1]);
31619 mode = GET_MODE (out);
31620 neglab = gen_label_rtx ();
31621 donelab = gen_label_rtx ();
31622 f0 = gen_reg_rtx (mode);
31624 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
31626 expand_float (out, in, 0);
31628 emit_jump_insn (gen_jump (donelab));
31631 emit_label (neglab);
31633 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
31635 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
31637 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
31639 expand_float (f0, i0, 0);
31641 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
31643 emit_label (donelab);
31646 /* AVX does not support 32-byte integer vector operations,
31647 thus the longest vector we are faced with is V16QImode. */
31648 #define MAX_VECT_LEN 16
31650 struct expand_vec_perm_d
31652 rtx target, op0, op1;
31653 unsigned char perm[MAX_VECT_LEN];
31654 enum machine_mode vmode;
31655 unsigned char nelt;
31659 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
31660 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
31661 static int extract_vec_perm_cst (struct expand_vec_perm_d *, tree);
31662 static bool ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask);
31665 /* Get a vector mode of the same size as the original but with elements
31666 twice as wide. This is only guaranteed to apply to integral vectors. */
31668 static inline enum machine_mode
31669 get_mode_wider_vector (enum machine_mode o)
31671 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
31672 enum machine_mode n = GET_MODE_WIDER_MODE (o);
31673 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
31674 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
31678 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
31679 with all elements equal to VAR. Return true if successful. */
31682 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
31683 rtx target, rtx val)
31706 /* First attempt to recognize VAL as-is. */
31707 dup = gen_rtx_VEC_DUPLICATE (mode, val);
31708 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
31709 if (recog_memoized (insn) < 0)
31712 /* If that fails, force VAL into a register. */
31715 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
31716 seq = get_insns ();
31719 emit_insn_before (seq, insn);
31721 ok = recog_memoized (insn) >= 0;
31730 if (TARGET_SSE || TARGET_3DNOW_A)
31734 val = gen_lowpart (SImode, val);
31735 x = gen_rtx_TRUNCATE (HImode, val);
31736 x = gen_rtx_VEC_DUPLICATE (mode, x);
31737 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31750 struct expand_vec_perm_d dperm;
31754 memset (&dperm, 0, sizeof (dperm));
31755 dperm.target = target;
31756 dperm.vmode = mode;
31757 dperm.nelt = GET_MODE_NUNITS (mode);
31758 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
31760 /* Extend to SImode using a paradoxical SUBREG. */
31761 tmp1 = gen_reg_rtx (SImode);
31762 emit_move_insn (tmp1, gen_lowpart (SImode, val));
31764 /* Insert the SImode value as low element of a V4SImode vector. */
31765 tmp2 = gen_lowpart (V4SImode, dperm.op0);
31766 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
31768 ok = (expand_vec_perm_1 (&dperm)
31769 || expand_vec_perm_broadcast_1 (&dperm));
31781 /* Replicate the value once into the next wider mode and recurse. */
31783 enum machine_mode smode, wsmode, wvmode;
31786 smode = GET_MODE_INNER (mode);
31787 wvmode = get_mode_wider_vector (mode);
31788 wsmode = GET_MODE_INNER (wvmode);
31790 val = convert_modes (wsmode, smode, val, true);
31791 x = expand_simple_binop (wsmode, ASHIFT, val,
31792 GEN_INT (GET_MODE_BITSIZE (smode)),
31793 NULL_RTX, 1, OPTAB_LIB_WIDEN);
31794 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
31796 x = gen_lowpart (wvmode, target);
31797 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
31805 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
31806 rtx x = gen_reg_rtx (hvmode);
31808 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
31811 x = gen_rtx_VEC_CONCAT (mode, x, x);
31812 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31821 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
31822 whose ONE_VAR element is VAR, and other elements are zero. Return true
31826 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
31827 rtx target, rtx var, int one_var)
31829 enum machine_mode vsimode;
31832 bool use_vector_set = false;
31837 /* For SSE4.1, we normally use vector set. But if the second
31838 element is zero and inter-unit moves are OK, we use movq
31840 use_vector_set = (TARGET_64BIT
31842 && !(TARGET_INTER_UNIT_MOVES
31848 use_vector_set = TARGET_SSE4_1;
31851 use_vector_set = TARGET_SSE2;
31854 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
31861 use_vector_set = TARGET_AVX;
31864 /* Use ix86_expand_vector_set in 64bit mode only. */
31865 use_vector_set = TARGET_AVX && TARGET_64BIT;
31871 if (use_vector_set)
31873 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
31874 var = force_reg (GET_MODE_INNER (mode), var);
31875 ix86_expand_vector_set (mmx_ok, target, var, one_var);
31891 var = force_reg (GET_MODE_INNER (mode), var);
31892 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
31893 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31898 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
31899 new_target = gen_reg_rtx (mode);
31901 new_target = target;
31902 var = force_reg (GET_MODE_INNER (mode), var);
31903 x = gen_rtx_VEC_DUPLICATE (mode, var);
31904 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
31905 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
31908 /* We need to shuffle the value to the correct position, so
31909 create a new pseudo to store the intermediate result. */
31911 /* With SSE2, we can use the integer shuffle insns. */
31912 if (mode != V4SFmode && TARGET_SSE2)
31914 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
31916 GEN_INT (one_var == 1 ? 0 : 1),
31917 GEN_INT (one_var == 2 ? 0 : 1),
31918 GEN_INT (one_var == 3 ? 0 : 1)));
31919 if (target != new_target)
31920 emit_move_insn (target, new_target);
31924 /* Otherwise convert the intermediate result to V4SFmode and
31925 use the SSE1 shuffle instructions. */
31926 if (mode != V4SFmode)
31928 tmp = gen_reg_rtx (V4SFmode);
31929 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
31934 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
31936 GEN_INT (one_var == 1 ? 0 : 1),
31937 GEN_INT (one_var == 2 ? 0+4 : 1+4),
31938 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
31940 if (mode != V4SFmode)
31941 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
31942 else if (tmp != target)
31943 emit_move_insn (target, tmp);
31945 else if (target != new_target)
31946 emit_move_insn (target, new_target);
31951 vsimode = V4SImode;
31957 vsimode = V2SImode;
31963 /* Zero extend the variable element to SImode and recurse. */
31964 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
31966 x = gen_reg_rtx (vsimode);
31967 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
31969 gcc_unreachable ();
31971 emit_move_insn (target, gen_lowpart (mode, x));
31979 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
31980 consisting of the values in VALS. It is known that all elements
31981 except ONE_VAR are constants. Return true if successful. */
31984 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
31985 rtx target, rtx vals, int one_var)
31987 rtx var = XVECEXP (vals, 0, one_var);
31988 enum machine_mode wmode;
31991 const_vec = copy_rtx (vals);
31992 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
31993 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
32001 /* For the two element vectors, it's just as easy to use
32002 the general case. */
32006 /* Use ix86_expand_vector_set in 64bit mode only. */
32029 /* There's no way to set one QImode entry easily. Combine
32030 the variable value with its adjacent constant value, and
32031 promote to an HImode set. */
32032 x = XVECEXP (vals, 0, one_var ^ 1);
32035 var = convert_modes (HImode, QImode, var, true);
32036 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
32037 NULL_RTX, 1, OPTAB_LIB_WIDEN);
32038 x = GEN_INT (INTVAL (x) & 0xff);
32042 var = convert_modes (HImode, QImode, var, true);
32043 x = gen_int_mode (INTVAL (x) << 8, HImode);
32045 if (x != const0_rtx)
32046 var = expand_simple_binop (HImode, IOR, var, x, var,
32047 1, OPTAB_LIB_WIDEN);
32049 x = gen_reg_rtx (wmode);
32050 emit_move_insn (x, gen_lowpart (wmode, const_vec));
32051 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
32053 emit_move_insn (target, gen_lowpart (mode, x));
32060 emit_move_insn (target, const_vec);
32061 ix86_expand_vector_set (mmx_ok, target, var, one_var);
32065 /* A subroutine of ix86_expand_vector_init_general. Use vector
32066 concatenate to handle the most general case: all values variable,
32067 and none identical. */
32070 ix86_expand_vector_init_concat (enum machine_mode mode,
32071 rtx target, rtx *ops, int n)
32073 enum machine_mode cmode, hmode = VOIDmode;
32074 rtx first[8], second[4];
32114 gcc_unreachable ();
32117 if (!register_operand (ops[1], cmode))
32118 ops[1] = force_reg (cmode, ops[1]);
32119 if (!register_operand (ops[0], cmode))
32120 ops[0] = force_reg (cmode, ops[0]);
32121 emit_insn (gen_rtx_SET (VOIDmode, target,
32122 gen_rtx_VEC_CONCAT (mode, ops[0],
32142 gcc_unreachable ();
32158 gcc_unreachable ();
32163 /* FIXME: We process inputs backward to help RA. PR 36222. */
32166 for (; i > 0; i -= 2, j--)
32168 first[j] = gen_reg_rtx (cmode);
32169 v = gen_rtvec (2, ops[i - 1], ops[i]);
32170 ix86_expand_vector_init (false, first[j],
32171 gen_rtx_PARALLEL (cmode, v));
32177 gcc_assert (hmode != VOIDmode);
32178 for (i = j = 0; i < n; i += 2, j++)
32180 second[j] = gen_reg_rtx (hmode);
32181 ix86_expand_vector_init_concat (hmode, second [j],
32185 ix86_expand_vector_init_concat (mode, target, second, n);
32188 ix86_expand_vector_init_concat (mode, target, first, n);
32192 gcc_unreachable ();
32196 /* A subroutine of ix86_expand_vector_init_general. Use vector
32197 interleave to handle the most general case: all values variable,
32198 and none identical. */
32201 ix86_expand_vector_init_interleave (enum machine_mode mode,
32202 rtx target, rtx *ops, int n)
32204 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
32207 rtx (*gen_load_even) (rtx, rtx, rtx);
32208 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
32209 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
32214 gen_load_even = gen_vec_setv8hi;
32215 gen_interleave_first_low = gen_vec_interleave_lowv4si;
32216 gen_interleave_second_low = gen_vec_interleave_lowv2di;
32217 inner_mode = HImode;
32218 first_imode = V4SImode;
32219 second_imode = V2DImode;
32220 third_imode = VOIDmode;
32223 gen_load_even = gen_vec_setv16qi;
32224 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
32225 gen_interleave_second_low = gen_vec_interleave_lowv4si;
32226 inner_mode = QImode;
32227 first_imode = V8HImode;
32228 second_imode = V4SImode;
32229 third_imode = V2DImode;
32232 gcc_unreachable ();
32235 for (i = 0; i < n; i++)
32237 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
32238 op0 = gen_reg_rtx (SImode);
32239 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
32241 /* Insert the SImode value as low element of V4SImode vector. */
32242 op1 = gen_reg_rtx (V4SImode);
32243 op0 = gen_rtx_VEC_MERGE (V4SImode,
32244 gen_rtx_VEC_DUPLICATE (V4SImode,
32246 CONST0_RTX (V4SImode),
32248 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
32250 /* Cast the V4SImode vector back to a vector in orignal mode. */
32251 op0 = gen_reg_rtx (mode);
32252 emit_move_insn (op0, gen_lowpart (mode, op1));
32254 /* Load even elements into the second positon. */
32255 emit_insn (gen_load_even (op0,
32256 force_reg (inner_mode,
32260 /* Cast vector to FIRST_IMODE vector. */
32261 ops[i] = gen_reg_rtx (first_imode);
32262 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
32265 /* Interleave low FIRST_IMODE vectors. */
32266 for (i = j = 0; i < n; i += 2, j++)
32268 op0 = gen_reg_rtx (first_imode);
32269 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
32271 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
32272 ops[j] = gen_reg_rtx (second_imode);
32273 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
32276 /* Interleave low SECOND_IMODE vectors. */
32277 switch (second_imode)
32280 for (i = j = 0; i < n / 2; i += 2, j++)
32282 op0 = gen_reg_rtx (second_imode);
32283 emit_insn (gen_interleave_second_low (op0, ops[i],
32286 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
32288 ops[j] = gen_reg_rtx (third_imode);
32289 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
32291 second_imode = V2DImode;
32292 gen_interleave_second_low = gen_vec_interleave_lowv2di;
32296 op0 = gen_reg_rtx (second_imode);
32297 emit_insn (gen_interleave_second_low (op0, ops[0],
32300 /* Cast the SECOND_IMODE vector back to a vector on original
32302 emit_insn (gen_rtx_SET (VOIDmode, target,
32303 gen_lowpart (mode, op0)));
32307 gcc_unreachable ();
32311 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
32312 all values variable, and none identical. */
32315 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
32316 rtx target, rtx vals)
32318 rtx ops[32], op0, op1;
32319 enum machine_mode half_mode = VOIDmode;
32326 if (!mmx_ok && !TARGET_SSE)
32338 n = GET_MODE_NUNITS (mode);
32339 for (i = 0; i < n; i++)
32340 ops[i] = XVECEXP (vals, 0, i);
32341 ix86_expand_vector_init_concat (mode, target, ops, n);
32345 half_mode = V16QImode;
32349 half_mode = V8HImode;
32353 n = GET_MODE_NUNITS (mode);
32354 for (i = 0; i < n; i++)
32355 ops[i] = XVECEXP (vals, 0, i);
32356 op0 = gen_reg_rtx (half_mode);
32357 op1 = gen_reg_rtx (half_mode);
32358 ix86_expand_vector_init_interleave (half_mode, op0, ops,
32360 ix86_expand_vector_init_interleave (half_mode, op1,
32361 &ops [n >> 1], n >> 2);
32362 emit_insn (gen_rtx_SET (VOIDmode, target,
32363 gen_rtx_VEC_CONCAT (mode, op0, op1)));
32367 if (!TARGET_SSE4_1)
32375 /* Don't use ix86_expand_vector_init_interleave if we can't
32376 move from GPR to SSE register directly. */
32377 if (!TARGET_INTER_UNIT_MOVES)
32380 n = GET_MODE_NUNITS (mode);
32381 for (i = 0; i < n; i++)
32382 ops[i] = XVECEXP (vals, 0, i);
32383 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
32391 gcc_unreachable ();
32395 int i, j, n_elts, n_words, n_elt_per_word;
32396 enum machine_mode inner_mode;
32397 rtx words[4], shift;
32399 inner_mode = GET_MODE_INNER (mode);
32400 n_elts = GET_MODE_NUNITS (mode);
32401 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
32402 n_elt_per_word = n_elts / n_words;
32403 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
32405 for (i = 0; i < n_words; ++i)
32407 rtx word = NULL_RTX;
32409 for (j = 0; j < n_elt_per_word; ++j)
32411 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
32412 elt = convert_modes (word_mode, inner_mode, elt, true);
32418 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
32419 word, 1, OPTAB_LIB_WIDEN);
32420 word = expand_simple_binop (word_mode, IOR, word, elt,
32421 word, 1, OPTAB_LIB_WIDEN);
32429 emit_move_insn (target, gen_lowpart (mode, words[0]));
32430 else if (n_words == 2)
32432 rtx tmp = gen_reg_rtx (mode);
32433 emit_clobber (tmp);
32434 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
32435 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
32436 emit_move_insn (target, tmp);
32438 else if (n_words == 4)
32440 rtx tmp = gen_reg_rtx (V4SImode);
32441 gcc_assert (word_mode == SImode);
32442 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
32443 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
32444 emit_move_insn (target, gen_lowpart (mode, tmp));
32447 gcc_unreachable ();
32451 /* Initialize vector TARGET via VALS. Suppress the use of MMX
32452 instructions unless MMX_OK is true. */
32455 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
32457 enum machine_mode mode = GET_MODE (target);
32458 enum machine_mode inner_mode = GET_MODE_INNER (mode);
32459 int n_elts = GET_MODE_NUNITS (mode);
32460 int n_var = 0, one_var = -1;
32461 bool all_same = true, all_const_zero = true;
32465 for (i = 0; i < n_elts; ++i)
32467 x = XVECEXP (vals, 0, i);
32468 if (!(CONST_INT_P (x)
32469 || GET_CODE (x) == CONST_DOUBLE
32470 || GET_CODE (x) == CONST_FIXED))
32471 n_var++, one_var = i;
32472 else if (x != CONST0_RTX (inner_mode))
32473 all_const_zero = false;
32474 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
32478 /* Constants are best loaded from the constant pool. */
32481 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
32485 /* If all values are identical, broadcast the value. */
32487 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
32488 XVECEXP (vals, 0, 0)))
32491 /* Values where only one field is non-constant are best loaded from
32492 the pool and overwritten via move later. */
32496 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
32497 XVECEXP (vals, 0, one_var),
32501 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
32505 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
32509 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
32511 enum machine_mode mode = GET_MODE (target);
32512 enum machine_mode inner_mode = GET_MODE_INNER (mode);
32513 enum machine_mode half_mode;
32514 bool use_vec_merge = false;
32516 static rtx (*gen_extract[6][2]) (rtx, rtx)
32518 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
32519 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
32520 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
32521 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
32522 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
32523 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
32525 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
32527 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
32528 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
32529 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
32530 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
32531 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
32532 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
32542 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
32543 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
32545 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
32547 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
32548 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32554 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
32558 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
32559 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
32561 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
32563 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
32564 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32571 /* For the two element vectors, we implement a VEC_CONCAT with
32572 the extraction of the other element. */
32574 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
32575 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
32578 op0 = val, op1 = tmp;
32580 op0 = tmp, op1 = val;
32582 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
32583 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32588 use_vec_merge = TARGET_SSE4_1;
32595 use_vec_merge = true;
32599 /* tmp = target = A B C D */
32600 tmp = copy_to_reg (target);
32601 /* target = A A B B */
32602 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
32603 /* target = X A B B */
32604 ix86_expand_vector_set (false, target, val, 0);
32605 /* target = A X C D */
32606 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
32607 const1_rtx, const0_rtx,
32608 GEN_INT (2+4), GEN_INT (3+4)));
32612 /* tmp = target = A B C D */
32613 tmp = copy_to_reg (target);
32614 /* tmp = X B C D */
32615 ix86_expand_vector_set (false, tmp, val, 0);
32616 /* target = A B X D */
32617 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
32618 const0_rtx, const1_rtx,
32619 GEN_INT (0+4), GEN_INT (3+4)));
32623 /* tmp = target = A B C D */
32624 tmp = copy_to_reg (target);
32625 /* tmp = X B C D */
32626 ix86_expand_vector_set (false, tmp, val, 0);
32627 /* target = A B X D */
32628 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
32629 const0_rtx, const1_rtx,
32630 GEN_INT (2+4), GEN_INT (0+4)));
32634 gcc_unreachable ();
32639 use_vec_merge = TARGET_SSE4_1;
32643 /* Element 0 handled by vec_merge below. */
32646 use_vec_merge = true;
32652 /* With SSE2, use integer shuffles to swap element 0 and ELT,
32653 store into element 0, then shuffle them back. */
32657 order[0] = GEN_INT (elt);
32658 order[1] = const1_rtx;
32659 order[2] = const2_rtx;
32660 order[3] = GEN_INT (3);
32661 order[elt] = const0_rtx;
32663 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
32664 order[1], order[2], order[3]));
32666 ix86_expand_vector_set (false, target, val, 0);
32668 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
32669 order[1], order[2], order[3]));
32673 /* For SSE1, we have to reuse the V4SF code. */
32674 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
32675 gen_lowpart (SFmode, val), elt);
32680 use_vec_merge = TARGET_SSE2;
32683 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
32687 use_vec_merge = TARGET_SSE4_1;
32694 half_mode = V16QImode;
32700 half_mode = V8HImode;
32706 half_mode = V4SImode;
32712 half_mode = V2DImode;
32718 half_mode = V4SFmode;
32724 half_mode = V2DFmode;
32730 /* Compute offset. */
32734 gcc_assert (i <= 1);
32736 /* Extract the half. */
32737 tmp = gen_reg_rtx (half_mode);
32738 emit_insn (gen_extract[j][i] (tmp, target));
32740 /* Put val in tmp at elt. */
32741 ix86_expand_vector_set (false, tmp, val, elt);
32744 emit_insn (gen_insert[j][i] (target, target, tmp));
32753 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
32754 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
32755 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32759 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
32761 emit_move_insn (mem, target);
32763 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
32764 emit_move_insn (tmp, val);
32766 emit_move_insn (target, mem);
32771 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
32773 enum machine_mode mode = GET_MODE (vec);
32774 enum machine_mode inner_mode = GET_MODE_INNER (mode);
32775 bool use_vec_extr = false;
32788 use_vec_extr = true;
32792 use_vec_extr = TARGET_SSE4_1;
32804 tmp = gen_reg_rtx (mode);
32805 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
32806 GEN_INT (elt), GEN_INT (elt),
32807 GEN_INT (elt+4), GEN_INT (elt+4)));
32811 tmp = gen_reg_rtx (mode);
32812 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
32816 gcc_unreachable ();
32819 use_vec_extr = true;
32824 use_vec_extr = TARGET_SSE4_1;
32838 tmp = gen_reg_rtx (mode);
32839 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
32840 GEN_INT (elt), GEN_INT (elt),
32841 GEN_INT (elt), GEN_INT (elt)));
32845 tmp = gen_reg_rtx (mode);
32846 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
32850 gcc_unreachable ();
32853 use_vec_extr = true;
32858 /* For SSE1, we have to reuse the V4SF code. */
32859 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
32860 gen_lowpart (V4SFmode, vec), elt);
32866 use_vec_extr = TARGET_SSE2;
32869 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
32873 use_vec_extr = TARGET_SSE4_1;
32879 tmp = gen_reg_rtx (V4SFmode);
32881 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
32883 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
32884 ix86_expand_vector_extract (false, target, tmp, elt & 3);
32892 tmp = gen_reg_rtx (V2DFmode);
32894 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
32896 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
32897 ix86_expand_vector_extract (false, target, tmp, elt & 1);
32905 tmp = gen_reg_rtx (V16QImode);
32907 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
32909 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
32910 ix86_expand_vector_extract (false, target, tmp, elt & 15);
32918 tmp = gen_reg_rtx (V8HImode);
32920 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
32922 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
32923 ix86_expand_vector_extract (false, target, tmp, elt & 7);
32931 tmp = gen_reg_rtx (V4SImode);
32933 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
32935 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
32936 ix86_expand_vector_extract (false, target, tmp, elt & 3);
32944 tmp = gen_reg_rtx (V2DImode);
32946 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
32948 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
32949 ix86_expand_vector_extract (false, target, tmp, elt & 1);
32955 /* ??? Could extract the appropriate HImode element and shift. */
32962 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
32963 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
32965 /* Let the rtl optimizers know about the zero extension performed. */
32966 if (inner_mode == QImode || inner_mode == HImode)
32968 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
32969 target = gen_lowpart (SImode, target);
32972 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32976 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
32978 emit_move_insn (mem, vec);
32980 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
32981 emit_move_insn (target, tmp);
32985 /* Expand a vector reduction. FN is the binary pattern to reduce;
32986 DEST is the destination; IN is the input vector. */
32989 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
32991 rtx tmp1, tmp2, tmp3, tmp4, tmp5;
32992 enum machine_mode mode = GET_MODE (in);
32995 tmp1 = gen_reg_rtx (mode);
32996 tmp2 = gen_reg_rtx (mode);
32997 tmp3 = gen_reg_rtx (mode);
33002 emit_insn (gen_sse_movhlps (tmp1, in, in));
33003 emit_insn (fn (tmp2, tmp1, in));
33004 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
33005 const1_rtx, const1_rtx,
33006 GEN_INT (1+4), GEN_INT (1+4)));
33009 tmp4 = gen_reg_rtx (mode);
33010 tmp5 = gen_reg_rtx (mode);
33011 emit_insn (gen_avx_vperm2f128v8sf3 (tmp4, in, in, const1_rtx));
33012 emit_insn (fn (tmp5, tmp4, in));
33013 emit_insn (gen_avx_shufps256 (tmp1, tmp5, tmp5, GEN_INT (2+12)));
33014 emit_insn (fn (tmp2, tmp1, tmp5));
33015 emit_insn (gen_avx_shufps256 (tmp3, tmp2, tmp2, const1_rtx));
33018 emit_insn (gen_avx_vperm2f128v4df3 (tmp1, in, in, const1_rtx));
33019 emit_insn (fn (tmp2, tmp1, in));
33020 emit_insn (gen_avx_shufpd256 (tmp3, tmp2, tmp2, const1_rtx));
33026 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, tmp1),
33027 gen_lowpart (V4DImode, in),
33028 gen_lowpart (V4DImode, in),
33032 for (i = 64; i >= GET_MODE_BITSIZE (GET_MODE_INNER (mode)); i >>= 1)
33036 tmp2 = gen_reg_rtx (mode);
33037 tmp3 = gen_reg_rtx (mode);
33039 emit_insn (fn (tmp2, tmp4, tmp5));
33040 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, tmp3),
33041 gen_lowpart (V2TImode, tmp2),
33048 gcc_unreachable ();
33050 emit_insn (fn (dest, tmp2, tmp3));
33053 /* Target hook for scalar_mode_supported_p. */
33055 ix86_scalar_mode_supported_p (enum machine_mode mode)
33057 if (DECIMAL_FLOAT_MODE_P (mode))
33058 return default_decimal_float_supported_p ();
33059 else if (mode == TFmode)
33062 return default_scalar_mode_supported_p (mode);
33065 /* Implements target hook vector_mode_supported_p. */
33067 ix86_vector_mode_supported_p (enum machine_mode mode)
33069 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
33071 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
33073 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
33075 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
33077 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
33082 /* Target hook for c_mode_for_suffix. */
33083 static enum machine_mode
33084 ix86_c_mode_for_suffix (char suffix)
33094 /* Worker function for TARGET_MD_ASM_CLOBBERS.
33096 We do this in the new i386 backend to maintain source compatibility
33097 with the old cc0-based compiler. */
33100 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
33101 tree inputs ATTRIBUTE_UNUSED,
33104 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
33106 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
33111 /* Implements target vector targetm.asm.encode_section_info. */
33113 static void ATTRIBUTE_UNUSED
33114 ix86_encode_section_info (tree decl, rtx rtl, int first)
33116 default_encode_section_info (decl, rtl, first);
33118 if (TREE_CODE (decl) == VAR_DECL
33119 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
33120 && ix86_in_large_data_p (decl))
33121 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
33124 /* Worker function for REVERSE_CONDITION. */
33127 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
33129 return (mode != CCFPmode && mode != CCFPUmode
33130 ? reverse_condition (code)
33131 : reverse_condition_maybe_unordered (code));
33134 /* Output code to perform an x87 FP register move, from OPERANDS[1]
33138 output_387_reg_move (rtx insn, rtx *operands)
33140 if (REG_P (operands[0]))
33142 if (REG_P (operands[1])
33143 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
33145 if (REGNO (operands[0]) == FIRST_STACK_REG)
33146 return output_387_ffreep (operands, 0);
33147 return "fstp\t%y0";
33149 if (STACK_TOP_P (operands[0]))
33150 return "fld%Z1\t%y1";
33153 else if (MEM_P (operands[0]))
33155 gcc_assert (REG_P (operands[1]));
33156 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
33157 return "fstp%Z0\t%y0";
33160 /* There is no non-popping store to memory for XFmode.
33161 So if we need one, follow the store with a load. */
33162 if (GET_MODE (operands[0]) == XFmode)
33163 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
33165 return "fst%Z0\t%y0";
33172 /* Output code to perform a conditional jump to LABEL, if C2 flag in
33173 FP status register is set. */
33176 ix86_emit_fp_unordered_jump (rtx label)
33178 rtx reg = gen_reg_rtx (HImode);
33181 emit_insn (gen_x86_fnstsw_1 (reg));
33183 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
33185 emit_insn (gen_x86_sahf_1 (reg));
33187 temp = gen_rtx_REG (CCmode, FLAGS_REG);
33188 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
33192 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
33194 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
33195 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
33198 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
33199 gen_rtx_LABEL_REF (VOIDmode, label),
33201 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
33203 emit_jump_insn (temp);
33204 predict_jump (REG_BR_PROB_BASE * 10 / 100);
33207 /* Output code to perform a log1p XFmode calculation. */
33209 void ix86_emit_i387_log1p (rtx op0, rtx op1)
33211 rtx label1 = gen_label_rtx ();
33212 rtx label2 = gen_label_rtx ();
33214 rtx tmp = gen_reg_rtx (XFmode);
33215 rtx tmp2 = gen_reg_rtx (XFmode);
33218 emit_insn (gen_absxf2 (tmp, op1));
33219 test = gen_rtx_GE (VOIDmode, tmp,
33220 CONST_DOUBLE_FROM_REAL_VALUE (
33221 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
33223 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
33225 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
33226 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
33227 emit_jump (label2);
33229 emit_label (label1);
33230 emit_move_insn (tmp, CONST1_RTX (XFmode));
33231 emit_insn (gen_addxf3 (tmp, op1, tmp));
33232 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
33233 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
33235 emit_label (label2);
33238 /* Emit code for round calculation. */
33239 void ix86_emit_i387_round (rtx op0, rtx op1)
33241 enum machine_mode inmode = GET_MODE (op1);
33242 enum machine_mode outmode = GET_MODE (op0);
33243 rtx e1, e2, res, tmp, tmp1, half;
33244 rtx scratch = gen_reg_rtx (HImode);
33245 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
33246 rtx jump_label = gen_label_rtx ();
33248 rtx (*gen_abs) (rtx, rtx);
33249 rtx (*gen_neg) (rtx, rtx);
33254 gen_abs = gen_abssf2;
33257 gen_abs = gen_absdf2;
33260 gen_abs = gen_absxf2;
33263 gcc_unreachable ();
33269 gen_neg = gen_negsf2;
33272 gen_neg = gen_negdf2;
33275 gen_neg = gen_negxf2;
33278 gen_neg = gen_neghi2;
33281 gen_neg = gen_negsi2;
33284 gen_neg = gen_negdi2;
33287 gcc_unreachable ();
33290 e1 = gen_reg_rtx (inmode);
33291 e2 = gen_reg_rtx (inmode);
33292 res = gen_reg_rtx (outmode);
33294 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
33296 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
33298 /* scratch = fxam(op1) */
33299 emit_insn (gen_rtx_SET (VOIDmode, scratch,
33300 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
33302 /* e1 = fabs(op1) */
33303 emit_insn (gen_abs (e1, op1));
33305 /* e2 = e1 + 0.5 */
33306 half = force_reg (inmode, half);
33307 emit_insn (gen_rtx_SET (VOIDmode, e2,
33308 gen_rtx_PLUS (inmode, e1, half)));
33310 /* res = floor(e2) */
33311 if (inmode != XFmode)
33313 tmp1 = gen_reg_rtx (XFmode);
33315 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
33316 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
33326 rtx tmp0 = gen_reg_rtx (XFmode);
33328 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
33330 emit_insn (gen_rtx_SET (VOIDmode, res,
33331 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
33332 UNSPEC_TRUNC_NOOP)));
33336 emit_insn (gen_frndintxf2_floor (res, tmp1));
33339 emit_insn (gen_lfloorxfhi2 (res, tmp1));
33342 emit_insn (gen_lfloorxfsi2 (res, tmp1));
33345 emit_insn (gen_lfloorxfdi2 (res, tmp1));
33348 gcc_unreachable ();
33351 /* flags = signbit(a) */
33352 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
33354 /* if (flags) then res = -res */
33355 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
33356 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
33357 gen_rtx_LABEL_REF (VOIDmode, jump_label),
33359 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
33360 predict_jump (REG_BR_PROB_BASE * 50 / 100);
33361 JUMP_LABEL (insn) = jump_label;
33363 emit_insn (gen_neg (res, res));
33365 emit_label (jump_label);
33366 LABEL_NUSES (jump_label) = 1;
33368 emit_move_insn (op0, res);
33371 /* Output code to perform a Newton-Rhapson approximation of a single precision
33372 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
33374 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
33376 rtx x0, x1, e0, e1;
33378 x0 = gen_reg_rtx (mode);
33379 e0 = gen_reg_rtx (mode);
33380 e1 = gen_reg_rtx (mode);
33381 x1 = gen_reg_rtx (mode);
33383 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
33385 /* x0 = rcp(b) estimate */
33386 emit_insn (gen_rtx_SET (VOIDmode, x0,
33387 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
33390 emit_insn (gen_rtx_SET (VOIDmode, e0,
33391 gen_rtx_MULT (mode, x0, b)));
33394 emit_insn (gen_rtx_SET (VOIDmode, e0,
33395 gen_rtx_MULT (mode, x0, e0)));
33398 emit_insn (gen_rtx_SET (VOIDmode, e1,
33399 gen_rtx_PLUS (mode, x0, x0)));
33402 emit_insn (gen_rtx_SET (VOIDmode, x1,
33403 gen_rtx_MINUS (mode, e1, e0)));
33406 emit_insn (gen_rtx_SET (VOIDmode, res,
33407 gen_rtx_MULT (mode, a, x1)));
33410 /* Output code to perform a Newton-Rhapson approximation of a
33411 single precision floating point [reciprocal] square root. */
33413 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
33416 rtx x0, e0, e1, e2, e3, mthree, mhalf;
33419 x0 = gen_reg_rtx (mode);
33420 e0 = gen_reg_rtx (mode);
33421 e1 = gen_reg_rtx (mode);
33422 e2 = gen_reg_rtx (mode);
33423 e3 = gen_reg_rtx (mode);
33425 real_from_integer (&r, VOIDmode, -3, -1, 0);
33426 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
33428 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
33429 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
33431 if (VECTOR_MODE_P (mode))
33433 mthree = ix86_build_const_vector (mode, true, mthree);
33434 mhalf = ix86_build_const_vector (mode, true, mhalf);
33437 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
33438 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
33440 /* x0 = rsqrt(a) estimate */
33441 emit_insn (gen_rtx_SET (VOIDmode, x0,
33442 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
33445 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
33450 zero = gen_reg_rtx (mode);
33451 mask = gen_reg_rtx (mode);
33453 zero = force_reg (mode, CONST0_RTX(mode));
33454 emit_insn (gen_rtx_SET (VOIDmode, mask,
33455 gen_rtx_NE (mode, zero, a)));
33457 emit_insn (gen_rtx_SET (VOIDmode, x0,
33458 gen_rtx_AND (mode, x0, mask)));
33462 emit_insn (gen_rtx_SET (VOIDmode, e0,
33463 gen_rtx_MULT (mode, x0, a)));
33465 emit_insn (gen_rtx_SET (VOIDmode, e1,
33466 gen_rtx_MULT (mode, e0, x0)));
33469 mthree = force_reg (mode, mthree);
33470 emit_insn (gen_rtx_SET (VOIDmode, e2,
33471 gen_rtx_PLUS (mode, e1, mthree)));
33473 mhalf = force_reg (mode, mhalf);
33475 /* e3 = -.5 * x0 */
33476 emit_insn (gen_rtx_SET (VOIDmode, e3,
33477 gen_rtx_MULT (mode, x0, mhalf)));
33479 /* e3 = -.5 * e0 */
33480 emit_insn (gen_rtx_SET (VOIDmode, e3,
33481 gen_rtx_MULT (mode, e0, mhalf)));
33482 /* ret = e2 * e3 */
33483 emit_insn (gen_rtx_SET (VOIDmode, res,
33484 gen_rtx_MULT (mode, e2, e3)));
33487 #ifdef TARGET_SOLARIS
33488 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
33491 i386_solaris_elf_named_section (const char *name, unsigned int flags,
33494 /* With Binutils 2.15, the "@unwind" marker must be specified on
33495 every occurrence of the ".eh_frame" section, not just the first
33498 && strcmp (name, ".eh_frame") == 0)
33500 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
33501 flags & SECTION_WRITE ? "aw" : "a");
33506 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
33508 solaris_elf_asm_comdat_section (name, flags, decl);
33513 default_elf_asm_named_section (name, flags, decl);
33515 #endif /* TARGET_SOLARIS */
33517 /* Return the mangling of TYPE if it is an extended fundamental type. */
33519 static const char *
33520 ix86_mangle_type (const_tree type)
33522 type = TYPE_MAIN_VARIANT (type);
33524 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
33525 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
33528 switch (TYPE_MODE (type))
33531 /* __float128 is "g". */
33534 /* "long double" or __float80 is "e". */
33541 /* For 32-bit code we can save PIC register setup by using
33542 __stack_chk_fail_local hidden function instead of calling
33543 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
33544 register, so it is better to call __stack_chk_fail directly. */
33546 static tree ATTRIBUTE_UNUSED
33547 ix86_stack_protect_fail (void)
33549 return TARGET_64BIT
33550 ? default_external_stack_protect_fail ()
33551 : default_hidden_stack_protect_fail ();
33554 /* Select a format to encode pointers in exception handling data. CODE
33555 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
33556 true if the symbol may be affected by dynamic relocations.
33558 ??? All x86 object file formats are capable of representing this.
33559 After all, the relocation needed is the same as for the call insn.
33560 Whether or not a particular assembler allows us to enter such, I
33561 guess we'll have to see. */
33563 asm_preferred_eh_data_format (int code, int global)
33567 int type = DW_EH_PE_sdata8;
33569 || ix86_cmodel == CM_SMALL_PIC
33570 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
33571 type = DW_EH_PE_sdata4;
33572 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
33574 if (ix86_cmodel == CM_SMALL
33575 || (ix86_cmodel == CM_MEDIUM && code))
33576 return DW_EH_PE_udata4;
33577 return DW_EH_PE_absptr;
33580 /* Expand copysign from SIGN to the positive value ABS_VALUE
33581 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
33584 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
33586 enum machine_mode mode = GET_MODE (sign);
33587 rtx sgn = gen_reg_rtx (mode);
33588 if (mask == NULL_RTX)
33590 enum machine_mode vmode;
33592 if (mode == SFmode)
33594 else if (mode == DFmode)
33599 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
33600 if (!VECTOR_MODE_P (mode))
33602 /* We need to generate a scalar mode mask in this case. */
33603 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
33604 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
33605 mask = gen_reg_rtx (mode);
33606 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
33610 mask = gen_rtx_NOT (mode, mask);
33611 emit_insn (gen_rtx_SET (VOIDmode, sgn,
33612 gen_rtx_AND (mode, mask, sign)));
33613 emit_insn (gen_rtx_SET (VOIDmode, result,
33614 gen_rtx_IOR (mode, abs_value, sgn)));
33617 /* Expand fabs (OP0) and return a new rtx that holds the result. The
33618 mask for masking out the sign-bit is stored in *SMASK, if that is
33621 ix86_expand_sse_fabs (rtx op0, rtx *smask)
33623 enum machine_mode vmode, mode = GET_MODE (op0);
33626 xa = gen_reg_rtx (mode);
33627 if (mode == SFmode)
33629 else if (mode == DFmode)
33633 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
33634 if (!VECTOR_MODE_P (mode))
33636 /* We need to generate a scalar mode mask in this case. */
33637 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
33638 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
33639 mask = gen_reg_rtx (mode);
33640 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
33642 emit_insn (gen_rtx_SET (VOIDmode, xa,
33643 gen_rtx_AND (mode, op0, mask)));
33651 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
33652 swapping the operands if SWAP_OPERANDS is true. The expanded
33653 code is a forward jump to a newly created label in case the
33654 comparison is true. The generated label rtx is returned. */
33656 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
33657 bool swap_operands)
33668 label = gen_label_rtx ();
33669 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
33670 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33671 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
33672 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
33673 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
33674 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
33675 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
33676 JUMP_LABEL (tmp) = label;
33681 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
33682 using comparison code CODE. Operands are swapped for the comparison if
33683 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
33685 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
33686 bool swap_operands)
33688 rtx (*insn)(rtx, rtx, rtx, rtx);
33689 enum machine_mode mode = GET_MODE (op0);
33690 rtx mask = gen_reg_rtx (mode);
33699 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
33701 emit_insn (insn (mask, op0, op1,
33702 gen_rtx_fmt_ee (code, mode, op0, op1)));
33706 /* Generate and return a rtx of mode MODE for 2**n where n is the number
33707 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
33709 ix86_gen_TWO52 (enum machine_mode mode)
33711 REAL_VALUE_TYPE TWO52r;
33714 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
33715 TWO52 = const_double_from_real_value (TWO52r, mode);
33716 TWO52 = force_reg (mode, TWO52);
33721 /* Expand SSE sequence for computing lround from OP1 storing
33724 ix86_expand_lround (rtx op0, rtx op1)
33726 /* C code for the stuff we're doing below:
33727 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
33730 enum machine_mode mode = GET_MODE (op1);
33731 const struct real_format *fmt;
33732 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
33735 /* load nextafter (0.5, 0.0) */
33736 fmt = REAL_MODE_FORMAT (mode);
33737 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
33738 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
33740 /* adj = copysign (0.5, op1) */
33741 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
33742 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
33744 /* adj = op1 + adj */
33745 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
33747 /* op0 = (imode)adj */
33748 expand_fix (op0, adj, 0);
33751 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
33754 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
33756 /* C code for the stuff we're doing below (for do_floor):
33758 xi -= (double)xi > op1 ? 1 : 0;
33761 enum machine_mode fmode = GET_MODE (op1);
33762 enum machine_mode imode = GET_MODE (op0);
33763 rtx ireg, freg, label, tmp;
33765 /* reg = (long)op1 */
33766 ireg = gen_reg_rtx (imode);
33767 expand_fix (ireg, op1, 0);
33769 /* freg = (double)reg */
33770 freg = gen_reg_rtx (fmode);
33771 expand_float (freg, ireg, 0);
33773 /* ireg = (freg > op1) ? ireg - 1 : ireg */
33774 label = ix86_expand_sse_compare_and_jump (UNLE,
33775 freg, op1, !do_floor);
33776 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
33777 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
33778 emit_move_insn (ireg, tmp);
33780 emit_label (label);
33781 LABEL_NUSES (label) = 1;
33783 emit_move_insn (op0, ireg);
33786 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
33787 result in OPERAND0. */
33789 ix86_expand_rint (rtx operand0, rtx operand1)
33791 /* C code for the stuff we're doing below:
33792 xa = fabs (operand1);
33793 if (!isless (xa, 2**52))
33795 xa = xa + 2**52 - 2**52;
33796 return copysign (xa, operand1);
33798 enum machine_mode mode = GET_MODE (operand0);
33799 rtx res, xa, label, TWO52, mask;
33801 res = gen_reg_rtx (mode);
33802 emit_move_insn (res, operand1);
33804 /* xa = abs (operand1) */
33805 xa = ix86_expand_sse_fabs (res, &mask);
33807 /* if (!isless (xa, TWO52)) goto label; */
33808 TWO52 = ix86_gen_TWO52 (mode);
33809 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33811 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
33812 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
33814 ix86_sse_copysign_to_positive (res, xa, res, mask);
33816 emit_label (label);
33817 LABEL_NUSES (label) = 1;
33819 emit_move_insn (operand0, res);
33822 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
33825 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
33827 /* C code for the stuff we expand below.
33828 double xa = fabs (x), x2;
33829 if (!isless (xa, TWO52))
33831 xa = xa + TWO52 - TWO52;
33832 x2 = copysign (xa, x);
33841 enum machine_mode mode = GET_MODE (operand0);
33842 rtx xa, TWO52, tmp, label, one, res, mask;
33844 TWO52 = ix86_gen_TWO52 (mode);
33846 /* Temporary for holding the result, initialized to the input
33847 operand to ease control flow. */
33848 res = gen_reg_rtx (mode);
33849 emit_move_insn (res, operand1);
33851 /* xa = abs (operand1) */
33852 xa = ix86_expand_sse_fabs (res, &mask);
33854 /* if (!isless (xa, TWO52)) goto label; */
33855 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33857 /* xa = xa + TWO52 - TWO52; */
33858 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
33859 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
33861 /* xa = copysign (xa, operand1) */
33862 ix86_sse_copysign_to_positive (xa, xa, res, mask);
33864 /* generate 1.0 or -1.0 */
33865 one = force_reg (mode,
33866 const_double_from_real_value (do_floor
33867 ? dconst1 : dconstm1, mode));
33869 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
33870 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
33871 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33872 gen_rtx_AND (mode, one, tmp)));
33873 /* We always need to subtract here to preserve signed zero. */
33874 tmp = expand_simple_binop (mode, MINUS,
33875 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
33876 emit_move_insn (res, tmp);
33878 emit_label (label);
33879 LABEL_NUSES (label) = 1;
33881 emit_move_insn (operand0, res);
33884 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
33887 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
33889 /* C code for the stuff we expand below.
33890 double xa = fabs (x), x2;
33891 if (!isless (xa, TWO52))
33893 x2 = (double)(long)x;
33900 if (HONOR_SIGNED_ZEROS (mode))
33901 return copysign (x2, x);
33904 enum machine_mode mode = GET_MODE (operand0);
33905 rtx xa, xi, TWO52, tmp, label, one, res, mask;
33907 TWO52 = ix86_gen_TWO52 (mode);
33909 /* Temporary for holding the result, initialized to the input
33910 operand to ease control flow. */
33911 res = gen_reg_rtx (mode);
33912 emit_move_insn (res, operand1);
33914 /* xa = abs (operand1) */
33915 xa = ix86_expand_sse_fabs (res, &mask);
33917 /* if (!isless (xa, TWO52)) goto label; */
33918 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33920 /* xa = (double)(long)x */
33921 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
33922 expand_fix (xi, res, 0);
33923 expand_float (xa, xi, 0);
33926 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
33928 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
33929 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
33930 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33931 gen_rtx_AND (mode, one, tmp)));
33932 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
33933 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
33934 emit_move_insn (res, tmp);
33936 if (HONOR_SIGNED_ZEROS (mode))
33937 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
33939 emit_label (label);
33940 LABEL_NUSES (label) = 1;
33942 emit_move_insn (operand0, res);
33945 /* Expand SSE sequence for computing round from OPERAND1 storing
33946 into OPERAND0. Sequence that works without relying on DImode truncation
33947 via cvttsd2siq that is only available on 64bit targets. */
33949 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
33951 /* C code for the stuff we expand below.
33952 double xa = fabs (x), xa2, x2;
33953 if (!isless (xa, TWO52))
33955 Using the absolute value and copying back sign makes
33956 -0.0 -> -0.0 correct.
33957 xa2 = xa + TWO52 - TWO52;
33962 else if (dxa > 0.5)
33964 x2 = copysign (xa2, x);
33967 enum machine_mode mode = GET_MODE (operand0);
33968 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
33970 TWO52 = ix86_gen_TWO52 (mode);
33972 /* Temporary for holding the result, initialized to the input
33973 operand to ease control flow. */
33974 res = gen_reg_rtx (mode);
33975 emit_move_insn (res, operand1);
33977 /* xa = abs (operand1) */
33978 xa = ix86_expand_sse_fabs (res, &mask);
33980 /* if (!isless (xa, TWO52)) goto label; */
33981 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33983 /* xa2 = xa + TWO52 - TWO52; */
33984 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
33985 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
33987 /* dxa = xa2 - xa; */
33988 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
33990 /* generate 0.5, 1.0 and -0.5 */
33991 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
33992 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
33993 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
33997 tmp = gen_reg_rtx (mode);
33998 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
33999 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
34000 emit_insn (gen_rtx_SET (VOIDmode, tmp,
34001 gen_rtx_AND (mode, one, tmp)));
34002 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
34003 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
34004 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
34005 emit_insn (gen_rtx_SET (VOIDmode, tmp,
34006 gen_rtx_AND (mode, one, tmp)));
34007 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
34009 /* res = copysign (xa2, operand1) */
34010 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
34012 emit_label (label);
34013 LABEL_NUSES (label) = 1;
34015 emit_move_insn (operand0, res);
34018 /* Expand SSE sequence for computing trunc from OPERAND1 storing
34021 ix86_expand_trunc (rtx operand0, rtx operand1)
34023 /* C code for SSE variant we expand below.
34024 double xa = fabs (x), x2;
34025 if (!isless (xa, TWO52))
34027 x2 = (double)(long)x;
34028 if (HONOR_SIGNED_ZEROS (mode))
34029 return copysign (x2, x);
34032 enum machine_mode mode = GET_MODE (operand0);
34033 rtx xa, xi, TWO52, label, res, mask;
34035 TWO52 = ix86_gen_TWO52 (mode);
34037 /* Temporary for holding the result, initialized to the input
34038 operand to ease control flow. */
34039 res = gen_reg_rtx (mode);
34040 emit_move_insn (res, operand1);
34042 /* xa = abs (operand1) */
34043 xa = ix86_expand_sse_fabs (res, &mask);
34045 /* if (!isless (xa, TWO52)) goto label; */
34046 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34048 /* x = (double)(long)x */
34049 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
34050 expand_fix (xi, res, 0);
34051 expand_float (res, xi, 0);
34053 if (HONOR_SIGNED_ZEROS (mode))
34054 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
34056 emit_label (label);
34057 LABEL_NUSES (label) = 1;
34059 emit_move_insn (operand0, res);
34062 /* Expand SSE sequence for computing trunc from OPERAND1 storing
34065 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
34067 enum machine_mode mode = GET_MODE (operand0);
34068 rtx xa, mask, TWO52, label, one, res, smask, tmp;
34070 /* C code for SSE variant we expand below.
34071 double xa = fabs (x), x2;
34072 if (!isless (xa, TWO52))
34074 xa2 = xa + TWO52 - TWO52;
34078 x2 = copysign (xa2, x);
34082 TWO52 = ix86_gen_TWO52 (mode);
34084 /* Temporary for holding the result, initialized to the input
34085 operand to ease control flow. */
34086 res = gen_reg_rtx (mode);
34087 emit_move_insn (res, operand1);
34089 /* xa = abs (operand1) */
34090 xa = ix86_expand_sse_fabs (res, &smask);
34092 /* if (!isless (xa, TWO52)) goto label; */
34093 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34095 /* res = xa + TWO52 - TWO52; */
34096 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
34097 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
34098 emit_move_insn (res, tmp);
34101 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
34103 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
34104 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
34105 emit_insn (gen_rtx_SET (VOIDmode, mask,
34106 gen_rtx_AND (mode, mask, one)));
34107 tmp = expand_simple_binop (mode, MINUS,
34108 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
34109 emit_move_insn (res, tmp);
34111 /* res = copysign (res, operand1) */
34112 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
34114 emit_label (label);
34115 LABEL_NUSES (label) = 1;
34117 emit_move_insn (operand0, res);
34120 /* Expand SSE sequence for computing round from OPERAND1 storing
34123 ix86_expand_round (rtx operand0, rtx operand1)
34125 /* C code for the stuff we're doing below:
34126 double xa = fabs (x);
34127 if (!isless (xa, TWO52))
34129 xa = (double)(long)(xa + nextafter (0.5, 0.0));
34130 return copysign (xa, x);
34132 enum machine_mode mode = GET_MODE (operand0);
34133 rtx res, TWO52, xa, label, xi, half, mask;
34134 const struct real_format *fmt;
34135 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
34137 /* Temporary for holding the result, initialized to the input
34138 operand to ease control flow. */
34139 res = gen_reg_rtx (mode);
34140 emit_move_insn (res, operand1);
34142 TWO52 = ix86_gen_TWO52 (mode);
34143 xa = ix86_expand_sse_fabs (res, &mask);
34144 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34146 /* load nextafter (0.5, 0.0) */
34147 fmt = REAL_MODE_FORMAT (mode);
34148 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
34149 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
34151 /* xa = xa + 0.5 */
34152 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
34153 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
34155 /* xa = (double)(int64_t)xa */
34156 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
34157 expand_fix (xi, xa, 0);
34158 expand_float (xa, xi, 0);
34160 /* res = copysign (xa, operand1) */
34161 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
34163 emit_label (label);
34164 LABEL_NUSES (label) = 1;
34166 emit_move_insn (operand0, res);
34169 /* Expand SSE sequence for computing round
34170 from OP1 storing into OP0 using sse4 round insn. */
34172 ix86_expand_round_sse4 (rtx op0, rtx op1)
34174 enum machine_mode mode = GET_MODE (op0);
34175 rtx e1, e2, res, half;
34176 const struct real_format *fmt;
34177 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
34178 rtx (*gen_copysign) (rtx, rtx, rtx);
34179 rtx (*gen_round) (rtx, rtx, rtx);
34184 gen_copysign = gen_copysignsf3;
34185 gen_round = gen_sse4_1_roundsf2;
34188 gen_copysign = gen_copysigndf3;
34189 gen_round = gen_sse4_1_rounddf2;
34192 gcc_unreachable ();
34195 /* round (a) = trunc (a + copysign (0.5, a)) */
34197 /* load nextafter (0.5, 0.0) */
34198 fmt = REAL_MODE_FORMAT (mode);
34199 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
34200 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
34201 half = const_double_from_real_value (pred_half, mode);
34203 /* e1 = copysign (0.5, op1) */
34204 e1 = gen_reg_rtx (mode);
34205 emit_insn (gen_copysign (e1, half, op1));
34207 /* e2 = op1 + e1 */
34208 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
34210 /* res = trunc (e2) */
34211 res = gen_reg_rtx (mode);
34212 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
34214 emit_move_insn (op0, res);
34218 /* Table of valid machine attributes. */
34219 static const struct attribute_spec ix86_attribute_table[] =
34221 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
34222 affects_type_identity } */
34223 /* Stdcall attribute says callee is responsible for popping arguments
34224 if they are not variable. */
34225 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34227 /* Fastcall attribute says callee is responsible for popping arguments
34228 if they are not variable. */
34229 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34231 /* Thiscall attribute says callee is responsible for popping arguments
34232 if they are not variable. */
34233 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34235 /* Cdecl attribute says the callee is a normal C declaration */
34236 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34238 /* Regparm attribute specifies how many integer arguments are to be
34239 passed in registers. */
34240 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
34242 /* Sseregparm attribute says we are using x86_64 calling conventions
34243 for FP arguments. */
34244 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34246 /* force_align_arg_pointer says this function realigns the stack at entry. */
34247 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
34248 false, true, true, ix86_handle_cconv_attribute, false },
34249 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34250 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
34251 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
34252 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
34255 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
34257 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
34259 #ifdef SUBTARGET_ATTRIBUTE_TABLE
34260 SUBTARGET_ATTRIBUTE_TABLE,
34262 /* ms_abi and sysv_abi calling convention function attributes. */
34263 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
34264 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
34265 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
34267 { "callee_pop_aggregate_return", 1, 1, false, true, true,
34268 ix86_handle_callee_pop_aggregate_return, true },
34270 { NULL, 0, 0, false, false, false, NULL, false }
34273 /* Implement targetm.vectorize.builtin_vectorization_cost. */
34275 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
34276 tree vectype ATTRIBUTE_UNUSED,
34277 int misalign ATTRIBUTE_UNUSED)
34279 switch (type_of_cost)
34282 return ix86_cost->scalar_stmt_cost;
34285 return ix86_cost->scalar_load_cost;
34288 return ix86_cost->scalar_store_cost;
34291 return ix86_cost->vec_stmt_cost;
34294 return ix86_cost->vec_align_load_cost;
34297 return ix86_cost->vec_store_cost;
34299 case vec_to_scalar:
34300 return ix86_cost->vec_to_scalar_cost;
34302 case scalar_to_vec:
34303 return ix86_cost->scalar_to_vec_cost;
34305 case unaligned_load:
34306 case unaligned_store:
34307 return ix86_cost->vec_unalign_load_cost;
34309 case cond_branch_taken:
34310 return ix86_cost->cond_taken_branch_cost;
34312 case cond_branch_not_taken:
34313 return ix86_cost->cond_not_taken_branch_cost;
34319 gcc_unreachable ();
34324 /* Implement targetm.vectorize.builtin_vec_perm. */
34327 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
34329 tree itype = TREE_TYPE (vec_type);
34330 bool u = TYPE_UNSIGNED (itype);
34331 enum machine_mode vmode = TYPE_MODE (vec_type);
34332 enum ix86_builtins fcode;
34333 bool ok = TARGET_SSE2;
34339 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
34342 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
34344 itype = ix86_get_builtin_type (IX86_BT_DI);
34349 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
34353 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
34355 itype = ix86_get_builtin_type (IX86_BT_SI);
34359 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
34362 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
34365 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
34368 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
34378 *mask_type = itype;
34379 return ix86_builtins[(int) fcode];
34382 /* Return a vector mode with twice as many elements as VMODE. */
34383 /* ??? Consider moving this to a table generated by genmodes.c. */
34385 static enum machine_mode
34386 doublesize_vector_mode (enum machine_mode vmode)
34390 case V2SFmode: return V4SFmode;
34391 case V1DImode: return V2DImode;
34392 case V2SImode: return V4SImode;
34393 case V4HImode: return V8HImode;
34394 case V8QImode: return V16QImode;
34396 case V2DFmode: return V4DFmode;
34397 case V4SFmode: return V8SFmode;
34398 case V2DImode: return V4DImode;
34399 case V4SImode: return V8SImode;
34400 case V8HImode: return V16HImode;
34401 case V16QImode: return V32QImode;
34403 case V4DFmode: return V8DFmode;
34404 case V8SFmode: return V16SFmode;
34405 case V4DImode: return V8DImode;
34406 case V8SImode: return V16SImode;
34407 case V16HImode: return V32HImode;
34408 case V32QImode: return V64QImode;
34411 gcc_unreachable ();
34415 /* Construct (set target (vec_select op0 (parallel perm))) and
34416 return true if that's a valid instruction in the active ISA. */
34419 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
34421 rtx rperm[MAX_VECT_LEN], x;
34424 for (i = 0; i < nelt; ++i)
34425 rperm[i] = GEN_INT (perm[i]);
34427 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
34428 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
34429 x = gen_rtx_SET (VOIDmode, target, x);
34432 if (recog_memoized (x) < 0)
34440 /* Similar, but generate a vec_concat from op0 and op1 as well. */
34443 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
34444 const unsigned char *perm, unsigned nelt)
34446 enum machine_mode v2mode;
34449 v2mode = doublesize_vector_mode (GET_MODE (op0));
34450 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
34451 return expand_vselect (target, x, perm, nelt);
34454 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34455 in terms of blendp[sd] / pblendw / pblendvb. */
34458 expand_vec_perm_blend (struct expand_vec_perm_d *d)
34460 enum machine_mode vmode = d->vmode;
34461 unsigned i, mask, nelt = d->nelt;
34462 rtx target, op0, op1, x;
34464 if (!TARGET_SSE4_1 || d->op0 == d->op1)
34466 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
34469 /* This is a blend, not a permute. Elements must stay in their
34470 respective lanes. */
34471 for (i = 0; i < nelt; ++i)
34473 unsigned e = d->perm[i];
34474 if (!(e == i || e == i + nelt))
34481 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
34482 decision should be extracted elsewhere, so that we only try that
34483 sequence once all budget==3 options have been tried. */
34485 /* For bytes, see if bytes move in pairs so we can use pblendw with
34486 an immediate argument, rather than pblendvb with a vector argument. */
34487 if (vmode == V16QImode)
34489 bool pblendw_ok = true;
34490 for (i = 0; i < 16 && pblendw_ok; i += 2)
34491 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
34495 rtx rperm[16], vperm;
34497 for (i = 0; i < nelt; ++i)
34498 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
34500 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
34501 vperm = force_reg (V16QImode, vperm);
34503 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
34508 target = d->target;
34520 for (i = 0; i < nelt; ++i)
34521 mask |= (d->perm[i] >= nelt) << i;
34525 for (i = 0; i < 2; ++i)
34526 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
34530 for (i = 0; i < 4; ++i)
34531 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
34535 for (i = 0; i < 8; ++i)
34536 mask |= (d->perm[i * 2] >= 16) << i;
34540 target = gen_lowpart (vmode, target);
34541 op0 = gen_lowpart (vmode, op0);
34542 op1 = gen_lowpart (vmode, op1);
34546 gcc_unreachable ();
34549 /* This matches five different patterns with the different modes. */
34550 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
34551 x = gen_rtx_SET (VOIDmode, target, x);
34557 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34558 in terms of the variable form of vpermilps.
34560 Note that we will have already failed the immediate input vpermilps,
34561 which requires that the high and low part shuffle be identical; the
34562 variable form doesn't require that. */
34565 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
34567 rtx rperm[8], vperm;
34570 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
34573 /* We can only permute within the 128-bit lane. */
34574 for (i = 0; i < 8; ++i)
34576 unsigned e = d->perm[i];
34577 if (i < 4 ? e >= 4 : e < 4)
34584 for (i = 0; i < 8; ++i)
34586 unsigned e = d->perm[i];
34588 /* Within each 128-bit lane, the elements of op0 are numbered
34589 from 0 and the elements of op1 are numbered from 4. */
34595 rperm[i] = GEN_INT (e);
34598 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
34599 vperm = force_reg (V8SImode, vperm);
34600 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
34605 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34606 in terms of pshufb or vpperm. */
34609 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
34611 unsigned i, nelt, eltsz;
34612 rtx rperm[16], vperm, target, op0, op1;
34614 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
34616 if (GET_MODE_SIZE (d->vmode) != 16)
34623 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
34625 for (i = 0; i < nelt; ++i)
34627 unsigned j, e = d->perm[i];
34628 for (j = 0; j < eltsz; ++j)
34629 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
34632 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
34633 vperm = force_reg (V16QImode, vperm);
34635 target = gen_lowpart (V16QImode, d->target);
34636 op0 = gen_lowpart (V16QImode, d->op0);
34637 if (d->op0 == d->op1)
34638 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
34641 op1 = gen_lowpart (V16QImode, d->op1);
34642 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
34648 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
34649 in a single instruction. */
34652 expand_vec_perm_1 (struct expand_vec_perm_d *d)
34654 unsigned i, nelt = d->nelt;
34655 unsigned char perm2[MAX_VECT_LEN];
34657 /* Check plain VEC_SELECT first, because AVX has instructions that could
34658 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
34659 input where SEL+CONCAT may not. */
34660 if (d->op0 == d->op1)
34662 int mask = nelt - 1;
34664 for (i = 0; i < nelt; i++)
34665 perm2[i] = d->perm[i] & mask;
34667 if (expand_vselect (d->target, d->op0, perm2, nelt))
34670 /* There are plenty of patterns in sse.md that are written for
34671 SEL+CONCAT and are not replicated for a single op. Perhaps
34672 that should be changed, to avoid the nastiness here. */
34674 /* Recognize interleave style patterns, which means incrementing
34675 every other permutation operand. */
34676 for (i = 0; i < nelt; i += 2)
34678 perm2[i] = d->perm[i] & mask;
34679 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
34681 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
34684 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
34687 for (i = 0; i < nelt; i += 4)
34689 perm2[i + 0] = d->perm[i + 0] & mask;
34690 perm2[i + 1] = d->perm[i + 1] & mask;
34691 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
34692 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
34695 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
34700 /* Finally, try the fully general two operand permute. */
34701 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
34704 /* Recognize interleave style patterns with reversed operands. */
34705 if (d->op0 != d->op1)
34707 for (i = 0; i < nelt; ++i)
34709 unsigned e = d->perm[i];
34717 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
34721 /* Try the SSE4.1 blend variable merge instructions. */
34722 if (expand_vec_perm_blend (d))
34725 /* Try one of the AVX vpermil variable permutations. */
34726 if (expand_vec_perm_vpermil (d))
34729 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
34730 if (expand_vec_perm_pshufb (d))
34736 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34737 in terms of a pair of pshuflw + pshufhw instructions. */
34740 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
34742 unsigned char perm2[MAX_VECT_LEN];
34746 if (d->vmode != V8HImode || d->op0 != d->op1)
34749 /* The two permutations only operate in 64-bit lanes. */
34750 for (i = 0; i < 4; ++i)
34751 if (d->perm[i] >= 4)
34753 for (i = 4; i < 8; ++i)
34754 if (d->perm[i] < 4)
34760 /* Emit the pshuflw. */
34761 memcpy (perm2, d->perm, 4);
34762 for (i = 4; i < 8; ++i)
34764 ok = expand_vselect (d->target, d->op0, perm2, 8);
34767 /* Emit the pshufhw. */
34768 memcpy (perm2 + 4, d->perm + 4, 4);
34769 for (i = 0; i < 4; ++i)
34771 ok = expand_vselect (d->target, d->target, perm2, 8);
34777 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
34778 the permutation using the SSSE3 palignr instruction. This succeeds
34779 when all of the elements in PERM fit within one vector and we merely
34780 need to shift them down so that a single vector permutation has a
34781 chance to succeed. */
34784 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
34786 unsigned i, nelt = d->nelt;
34791 /* Even with AVX, palignr only operates on 128-bit vectors. */
34792 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
34795 min = nelt, max = 0;
34796 for (i = 0; i < nelt; ++i)
34798 unsigned e = d->perm[i];
34804 if (min == 0 || max - min >= nelt)
34807 /* Given that we have SSSE3, we know we'll be able to implement the
34808 single operand permutation after the palignr with pshufb. */
34812 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
34813 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
34814 gen_lowpart (TImode, d->op1),
34815 gen_lowpart (TImode, d->op0), shift));
34817 d->op0 = d->op1 = d->target;
34820 for (i = 0; i < nelt; ++i)
34822 unsigned e = d->perm[i] - min;
34828 /* Test for the degenerate case where the alignment by itself
34829 produces the desired permutation. */
34833 ok = expand_vec_perm_1 (d);
34839 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
34840 a two vector permutation into a single vector permutation by using
34841 an interleave operation to merge the vectors. */
34844 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
34846 struct expand_vec_perm_d dremap, dfinal;
34847 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
34848 unsigned contents, h1, h2, h3, h4;
34849 unsigned char remap[2 * MAX_VECT_LEN];
34853 if (d->op0 == d->op1)
34856 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
34857 lanes. We can use similar techniques with the vperm2f128 instruction,
34858 but it requires slightly different logic. */
34859 if (GET_MODE_SIZE (d->vmode) != 16)
34862 /* Examine from whence the elements come. */
34864 for (i = 0; i < nelt; ++i)
34865 contents |= 1u << d->perm[i];
34867 /* Split the two input vectors into 4 halves. */
34868 h1 = (1u << nelt2) - 1;
34873 memset (remap, 0xff, sizeof (remap));
34876 /* If the elements from the low halves use interleave low, and similarly
34877 for interleave high. If the elements are from mis-matched halves, we
34878 can use shufps for V4SF/V4SI or do a DImode shuffle. */
34879 if ((contents & (h1 | h3)) == contents)
34881 for (i = 0; i < nelt2; ++i)
34884 remap[i + nelt] = i * 2 + 1;
34885 dremap.perm[i * 2] = i;
34886 dremap.perm[i * 2 + 1] = i + nelt;
34889 else if ((contents & (h2 | h4)) == contents)
34891 for (i = 0; i < nelt2; ++i)
34893 remap[i + nelt2] = i * 2;
34894 remap[i + nelt + nelt2] = i * 2 + 1;
34895 dremap.perm[i * 2] = i + nelt2;
34896 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
34899 else if ((contents & (h1 | h4)) == contents)
34901 for (i = 0; i < nelt2; ++i)
34904 remap[i + nelt + nelt2] = i + nelt2;
34905 dremap.perm[i] = i;
34906 dremap.perm[i + nelt2] = i + nelt + nelt2;
34910 dremap.vmode = V2DImode;
34912 dremap.perm[0] = 0;
34913 dremap.perm[1] = 3;
34916 else if ((contents & (h2 | h3)) == contents)
34918 for (i = 0; i < nelt2; ++i)
34920 remap[i + nelt2] = i;
34921 remap[i + nelt] = i + nelt2;
34922 dremap.perm[i] = i + nelt2;
34923 dremap.perm[i + nelt2] = i + nelt;
34927 dremap.vmode = V2DImode;
34929 dremap.perm[0] = 1;
34930 dremap.perm[1] = 2;
34936 /* Use the remapping array set up above to move the elements from their
34937 swizzled locations into their final destinations. */
34939 for (i = 0; i < nelt; ++i)
34941 unsigned e = remap[d->perm[i]];
34942 gcc_assert (e < nelt);
34943 dfinal.perm[i] = e;
34945 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
34946 dfinal.op1 = dfinal.op0;
34947 dremap.target = dfinal.op0;
34949 /* Test if the final remap can be done with a single insn. For V4SFmode or
34950 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
34952 ok = expand_vec_perm_1 (&dfinal);
34953 seq = get_insns ();
34959 if (dremap.vmode != dfinal.vmode)
34961 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
34962 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
34963 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
34966 ok = expand_vec_perm_1 (&dremap);
34973 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
34974 permutation with two pshufb insns and an ior. We should have already
34975 failed all two instruction sequences. */
34978 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
34980 rtx rperm[2][16], vperm, l, h, op, m128;
34981 unsigned int i, nelt, eltsz;
34983 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
34985 gcc_assert (d->op0 != d->op1);
34988 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
34990 /* Generate two permutation masks. If the required element is within
34991 the given vector it is shuffled into the proper lane. If the required
34992 element is in the other vector, force a zero into the lane by setting
34993 bit 7 in the permutation mask. */
34994 m128 = GEN_INT (-128);
34995 for (i = 0; i < nelt; ++i)
34997 unsigned j, e = d->perm[i];
34998 unsigned which = (e >= nelt);
35002 for (j = 0; j < eltsz; ++j)
35004 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
35005 rperm[1-which][i*eltsz + j] = m128;
35009 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
35010 vperm = force_reg (V16QImode, vperm);
35012 l = gen_reg_rtx (V16QImode);
35013 op = gen_lowpart (V16QImode, d->op0);
35014 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
35016 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
35017 vperm = force_reg (V16QImode, vperm);
35019 h = gen_reg_rtx (V16QImode);
35020 op = gen_lowpart (V16QImode, d->op1);
35021 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
35023 op = gen_lowpart (V16QImode, d->target);
35024 emit_insn (gen_iorv16qi3 (op, l, h));
35029 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
35030 and extract-odd permutations. */
35033 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
35040 t1 = gen_reg_rtx (V4DFmode);
35041 t2 = gen_reg_rtx (V4DFmode);
35043 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
35044 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
35045 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
35047 /* Now an unpck[lh]pd will produce the result required. */
35049 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
35051 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
35057 int mask = odd ? 0xdd : 0x88;
35059 t1 = gen_reg_rtx (V8SFmode);
35060 t2 = gen_reg_rtx (V8SFmode);
35061 t3 = gen_reg_rtx (V8SFmode);
35063 /* Shuffle within the 128-bit lanes to produce:
35064 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
35065 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
35068 /* Shuffle the lanes around to produce:
35069 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
35070 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
35073 /* Shuffle within the 128-bit lanes to produce:
35074 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
35075 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
35077 /* Shuffle within the 128-bit lanes to produce:
35078 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
35079 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
35081 /* Shuffle the lanes around to produce:
35082 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
35083 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
35092 /* These are always directly implementable by expand_vec_perm_1. */
35093 gcc_unreachable ();
35097 return expand_vec_perm_pshufb2 (d);
35100 /* We need 2*log2(N)-1 operations to achieve odd/even
35101 with interleave. */
35102 t1 = gen_reg_rtx (V8HImode);
35103 t2 = gen_reg_rtx (V8HImode);
35104 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
35105 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
35106 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
35107 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
35109 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
35111 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
35118 return expand_vec_perm_pshufb2 (d);
35121 t1 = gen_reg_rtx (V16QImode);
35122 t2 = gen_reg_rtx (V16QImode);
35123 t3 = gen_reg_rtx (V16QImode);
35124 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
35125 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
35126 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
35127 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
35128 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
35129 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
35131 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
35133 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
35139 gcc_unreachable ();
35145 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
35146 extract-even and extract-odd permutations. */
35149 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
35151 unsigned i, odd, nelt = d->nelt;
35154 if (odd != 0 && odd != 1)
35157 for (i = 1; i < nelt; ++i)
35158 if (d->perm[i] != 2 * i + odd)
35161 return expand_vec_perm_even_odd_1 (d, odd);
35164 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
35165 permutations. We assume that expand_vec_perm_1 has already failed. */
35168 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
35170 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
35171 enum machine_mode vmode = d->vmode;
35172 unsigned char perm2[4];
35180 /* These are special-cased in sse.md so that we can optionally
35181 use the vbroadcast instruction. They expand to two insns
35182 if the input happens to be in a register. */
35183 gcc_unreachable ();
35189 /* These are always implementable using standard shuffle patterns. */
35190 gcc_unreachable ();
35194 /* These can be implemented via interleave. We save one insn by
35195 stopping once we have promoted to V4SImode and then use pshufd. */
35198 optab otab = vec_interleave_low_optab;
35202 otab = vec_interleave_high_optab;
35207 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
35208 vmode = get_mode_wider_vector (vmode);
35209 op0 = gen_lowpart (vmode, op0);
35211 while (vmode != V4SImode);
35213 memset (perm2, elt, 4);
35214 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
35219 gcc_unreachable ();
35223 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
35224 broadcast permutations. */
35227 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
35229 unsigned i, elt, nelt = d->nelt;
35231 if (d->op0 != d->op1)
35235 for (i = 1; i < nelt; ++i)
35236 if (d->perm[i] != elt)
35239 return expand_vec_perm_broadcast_1 (d);
35242 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
35243 With all of the interface bits taken care of, perform the expansion
35244 in D and return true on success. */
35247 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
35249 /* Try a single instruction expansion. */
35250 if (expand_vec_perm_1 (d))
35253 /* Try sequences of two instructions. */
35255 if (expand_vec_perm_pshuflw_pshufhw (d))
35258 if (expand_vec_perm_palignr (d))
35261 if (expand_vec_perm_interleave2 (d))
35264 if (expand_vec_perm_broadcast (d))
35267 /* Try sequences of three instructions. */
35269 if (expand_vec_perm_pshufb2 (d))
35272 /* ??? Look for narrow permutations whose element orderings would
35273 allow the promotion to a wider mode. */
35275 /* ??? Look for sequences of interleave or a wider permute that place
35276 the data into the correct lanes for a half-vector shuffle like
35277 pshuf[lh]w or vpermilps. */
35279 /* ??? Look for sequences of interleave that produce the desired results.
35280 The combinatorics of punpck[lh] get pretty ugly... */
35282 if (expand_vec_perm_even_odd (d))
35288 /* Extract the values from the vector CST into the permutation array in D.
35289 Return 0 on error, 1 if all values from the permutation come from the
35290 first vector, 2 if all values from the second vector, and 3 otherwise. */
35293 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
35295 tree list = TREE_VECTOR_CST_ELTS (cst);
35296 unsigned i, nelt = d->nelt;
35299 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
35301 unsigned HOST_WIDE_INT e;
35303 if (!host_integerp (TREE_VALUE (list), 1))
35305 e = tree_low_cst (TREE_VALUE (list), 1);
35309 ret |= (e < nelt ? 1 : 2);
35312 gcc_assert (list == NULL);
35314 /* For all elements from second vector, fold the elements to first. */
35316 for (i = 0; i < nelt; ++i)
35317 d->perm[i] -= nelt;
35323 ix86_expand_vec_perm_builtin (tree exp)
35325 struct expand_vec_perm_d d;
35326 tree arg0, arg1, arg2;
35328 arg0 = CALL_EXPR_ARG (exp, 0);
35329 arg1 = CALL_EXPR_ARG (exp, 1);
35330 arg2 = CALL_EXPR_ARG (exp, 2);
35332 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
35333 d.nelt = GET_MODE_NUNITS (d.vmode);
35334 d.testing_p = false;
35335 gcc_assert (VECTOR_MODE_P (d.vmode));
35337 if (TREE_CODE (arg2) != VECTOR_CST)
35339 error_at (EXPR_LOCATION (exp),
35340 "vector permutation requires vector constant");
35344 switch (extract_vec_perm_cst (&d, arg2))
35350 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
35354 if (!operand_equal_p (arg0, arg1, 0))
35356 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
35357 d.op0 = force_reg (d.vmode, d.op0);
35358 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
35359 d.op1 = force_reg (d.vmode, d.op1);
35363 /* The elements of PERM do not suggest that only the first operand
35364 is used, but both operands are identical. Allow easier matching
35365 of the permutation by folding the permutation into the single
35368 unsigned i, nelt = d.nelt;
35369 for (i = 0; i < nelt; ++i)
35370 if (d.perm[i] >= nelt)
35376 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
35377 d.op0 = force_reg (d.vmode, d.op0);
35382 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
35383 d.op0 = force_reg (d.vmode, d.op0);
35388 d.target = gen_reg_rtx (d.vmode);
35389 if (ix86_expand_vec_perm_builtin_1 (&d))
35392 /* For compiler generated permutations, we should never got here, because
35393 the compiler should also be checking the ok hook. But since this is a
35394 builtin the user has access too, so don't abort. */
35398 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
35401 sorry ("vector permutation (%d %d %d %d)",
35402 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
35405 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
35406 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
35407 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
35410 sorry ("vector permutation "
35411 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
35412 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
35413 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
35414 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
35415 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
35418 gcc_unreachable ();
35421 return CONST0_RTX (d.vmode);
35424 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
35427 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
35429 struct expand_vec_perm_d d;
35433 d.vmode = TYPE_MODE (vec_type);
35434 d.nelt = GET_MODE_NUNITS (d.vmode);
35435 d.testing_p = true;
35437 /* Given sufficient ISA support we can just return true here
35438 for selected vector modes. */
35439 if (GET_MODE_SIZE (d.vmode) == 16)
35441 /* All implementable with a single vpperm insn. */
35444 /* All implementable with 2 pshufb + 1 ior. */
35447 /* All implementable with shufpd or unpck[lh]pd. */
35452 vec_mask = extract_vec_perm_cst (&d, mask);
35454 /* Check whether the mask can be applied to the vector type. */
35455 if (vec_mask < 0 || vec_mask > 3)
35458 one_vec = (vec_mask != 3);
35460 /* Implementable with shufps or pshufd. */
35461 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
35464 /* Otherwise we have to go through the motions and see if we can
35465 figure out how to generate the requested permutation. */
35466 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
35467 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
35469 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
35472 ret = ix86_expand_vec_perm_builtin_1 (&d);
35479 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
35481 struct expand_vec_perm_d d;
35487 d.vmode = GET_MODE (targ);
35488 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
35489 d.testing_p = false;
35491 for (i = 0; i < nelt; ++i)
35492 d.perm[i] = i * 2 + odd;
35494 /* We'll either be able to implement the permutation directly... */
35495 if (expand_vec_perm_1 (&d))
35498 /* ... or we use the special-case patterns. */
35499 expand_vec_perm_even_odd_1 (&d, odd);
35502 /* Expand an insert into a vector register through pinsr insn.
35503 Return true if successful. */
35506 ix86_expand_pinsr (rtx *operands)
35508 rtx dst = operands[0];
35509 rtx src = operands[3];
35511 unsigned int size = INTVAL (operands[1]);
35512 unsigned int pos = INTVAL (operands[2]);
35514 if (GET_CODE (dst) == SUBREG)
35516 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
35517 dst = SUBREG_REG (dst);
35520 if (GET_CODE (src) == SUBREG)
35521 src = SUBREG_REG (src);
35523 switch (GET_MODE (dst))
35530 enum machine_mode srcmode, dstmode;
35531 rtx (*pinsr)(rtx, rtx, rtx, rtx);
35533 srcmode = mode_for_size (size, MODE_INT, 0);
35538 if (!TARGET_SSE4_1)
35540 dstmode = V16QImode;
35541 pinsr = gen_sse4_1_pinsrb;
35547 dstmode = V8HImode;
35548 pinsr = gen_sse2_pinsrw;
35552 if (!TARGET_SSE4_1)
35554 dstmode = V4SImode;
35555 pinsr = gen_sse4_1_pinsrd;
35559 gcc_assert (TARGET_64BIT);
35560 if (!TARGET_SSE4_1)
35562 dstmode = V2DImode;
35563 pinsr = gen_sse4_1_pinsrq;
35570 dst = gen_lowpart (dstmode, dst);
35571 src = gen_lowpart (srcmode, src);
35575 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
35584 /* This function returns the calling abi specific va_list type node.
35585 It returns the FNDECL specific va_list type. */
35588 ix86_fn_abi_va_list (tree fndecl)
35591 return va_list_type_node;
35592 gcc_assert (fndecl != NULL_TREE);
35594 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
35595 return ms_va_list_type_node;
35597 return sysv_va_list_type_node;
35600 /* Returns the canonical va_list type specified by TYPE. If there
35601 is no valid TYPE provided, it return NULL_TREE. */
35604 ix86_canonical_va_list_type (tree type)
35608 /* Resolve references and pointers to va_list type. */
35609 if (TREE_CODE (type) == MEM_REF)
35610 type = TREE_TYPE (type);
35611 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
35612 type = TREE_TYPE (type);
35613 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
35614 type = TREE_TYPE (type);
35616 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
35618 wtype = va_list_type_node;
35619 gcc_assert (wtype != NULL_TREE);
35621 if (TREE_CODE (wtype) == ARRAY_TYPE)
35623 /* If va_list is an array type, the argument may have decayed
35624 to a pointer type, e.g. by being passed to another function.
35625 In that case, unwrap both types so that we can compare the
35626 underlying records. */
35627 if (TREE_CODE (htype) == ARRAY_TYPE
35628 || POINTER_TYPE_P (htype))
35630 wtype = TREE_TYPE (wtype);
35631 htype = TREE_TYPE (htype);
35634 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
35635 return va_list_type_node;
35636 wtype = sysv_va_list_type_node;
35637 gcc_assert (wtype != NULL_TREE);
35639 if (TREE_CODE (wtype) == ARRAY_TYPE)
35641 /* If va_list is an array type, the argument may have decayed
35642 to a pointer type, e.g. by being passed to another function.
35643 In that case, unwrap both types so that we can compare the
35644 underlying records. */
35645 if (TREE_CODE (htype) == ARRAY_TYPE
35646 || POINTER_TYPE_P (htype))
35648 wtype = TREE_TYPE (wtype);
35649 htype = TREE_TYPE (htype);
35652 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
35653 return sysv_va_list_type_node;
35654 wtype = ms_va_list_type_node;
35655 gcc_assert (wtype != NULL_TREE);
35657 if (TREE_CODE (wtype) == ARRAY_TYPE)
35659 /* If va_list is an array type, the argument may have decayed
35660 to a pointer type, e.g. by being passed to another function.
35661 In that case, unwrap both types so that we can compare the
35662 underlying records. */
35663 if (TREE_CODE (htype) == ARRAY_TYPE
35664 || POINTER_TYPE_P (htype))
35666 wtype = TREE_TYPE (wtype);
35667 htype = TREE_TYPE (htype);
35670 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
35671 return ms_va_list_type_node;
35674 return std_canonical_va_list_type (type);
35677 /* Iterate through the target-specific builtin types for va_list.
35678 IDX denotes the iterator, *PTREE is set to the result type of
35679 the va_list builtin, and *PNAME to its internal type.
35680 Returns zero if there is no element for this index, otherwise
35681 IDX should be increased upon the next call.
35682 Note, do not iterate a base builtin's name like __builtin_va_list.
35683 Used from c_common_nodes_and_builtins. */
35686 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
35696 *ptree = ms_va_list_type_node;
35697 *pname = "__builtin_ms_va_list";
35701 *ptree = sysv_va_list_type_node;
35702 *pname = "__builtin_sysv_va_list";
35710 #undef TARGET_SCHED_DISPATCH
35711 #define TARGET_SCHED_DISPATCH has_dispatch
35712 #undef TARGET_SCHED_DISPATCH_DO
35713 #define TARGET_SCHED_DISPATCH_DO do_dispatch
35714 #undef TARGET_SCHED_REASSOCIATION_WIDTH
35715 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
35717 /* The size of the dispatch window is the total number of bytes of
35718 object code allowed in a window. */
35719 #define DISPATCH_WINDOW_SIZE 16
35721 /* Number of dispatch windows considered for scheduling. */
35722 #define MAX_DISPATCH_WINDOWS 3
35724 /* Maximum number of instructions in a window. */
35727 /* Maximum number of immediate operands in a window. */
35730 /* Maximum number of immediate bits allowed in a window. */
35731 #define MAX_IMM_SIZE 128
35733 /* Maximum number of 32 bit immediates allowed in a window. */
35734 #define MAX_IMM_32 4
35736 /* Maximum number of 64 bit immediates allowed in a window. */
35737 #define MAX_IMM_64 2
35739 /* Maximum total of loads or prefetches allowed in a window. */
35742 /* Maximum total of stores allowed in a window. */
35743 #define MAX_STORE 1
35749 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
35750 enum dispatch_group {
35765 /* Number of allowable groups in a dispatch window. It is an array
35766 indexed by dispatch_group enum. 100 is used as a big number,
35767 because the number of these kind of operations does not have any
35768 effect in dispatch window, but we need them for other reasons in
35770 static unsigned int num_allowable_groups[disp_last] = {
35771 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
35774 char group_name[disp_last + 1][16] = {
35775 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
35776 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
35777 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
35780 /* Instruction path. */
35783 path_single, /* Single micro op. */
35784 path_double, /* Double micro op. */
35785 path_multi, /* Instructions with more than 2 micro op.. */
35789 /* sched_insn_info defines a window to the instructions scheduled in
35790 the basic block. It contains a pointer to the insn_info table and
35791 the instruction scheduled.
35793 Windows are allocated for each basic block and are linked
35795 typedef struct sched_insn_info_s {
35797 enum dispatch_group group;
35798 enum insn_path path;
35803 /* Linked list of dispatch windows. This is a two way list of
35804 dispatch windows of a basic block. It contains information about
35805 the number of uops in the window and the total number of
35806 instructions and of bytes in the object code for this dispatch
35808 typedef struct dispatch_windows_s {
35809 int num_insn; /* Number of insn in the window. */
35810 int num_uops; /* Number of uops in the window. */
35811 int window_size; /* Number of bytes in the window. */
35812 int window_num; /* Window number between 0 or 1. */
35813 int num_imm; /* Number of immediates in an insn. */
35814 int num_imm_32; /* Number of 32 bit immediates in an insn. */
35815 int num_imm_64; /* Number of 64 bit immediates in an insn. */
35816 int imm_size; /* Total immediates in the window. */
35817 int num_loads; /* Total memory loads in the window. */
35818 int num_stores; /* Total memory stores in the window. */
35819 int violation; /* Violation exists in window. */
35820 sched_insn_info *window; /* Pointer to the window. */
35821 struct dispatch_windows_s *next;
35822 struct dispatch_windows_s *prev;
35823 } dispatch_windows;
35825 /* Immediate valuse used in an insn. */
35826 typedef struct imm_info_s
35833 static dispatch_windows *dispatch_window_list;
35834 static dispatch_windows *dispatch_window_list1;
35836 /* Get dispatch group of insn. */
35838 static enum dispatch_group
35839 get_mem_group (rtx insn)
35841 enum attr_memory memory;
35843 if (INSN_CODE (insn) < 0)
35844 return disp_no_group;
35845 memory = get_attr_memory (insn);
35846 if (memory == MEMORY_STORE)
35849 if (memory == MEMORY_LOAD)
35852 if (memory == MEMORY_BOTH)
35853 return disp_load_store;
35855 return disp_no_group;
35858 /* Return true if insn is a compare instruction. */
35863 enum attr_type type;
35865 type = get_attr_type (insn);
35866 return (type == TYPE_TEST
35867 || type == TYPE_ICMP
35868 || type == TYPE_FCMP
35869 || GET_CODE (PATTERN (insn)) == COMPARE);
35872 /* Return true if a dispatch violation encountered. */
35875 dispatch_violation (void)
35877 if (dispatch_window_list->next)
35878 return dispatch_window_list->next->violation;
35879 return dispatch_window_list->violation;
35882 /* Return true if insn is a branch instruction. */
35885 is_branch (rtx insn)
35887 return (CALL_P (insn) || JUMP_P (insn));
35890 /* Return true if insn is a prefetch instruction. */
35893 is_prefetch (rtx insn)
35895 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
35898 /* This function initializes a dispatch window and the list container holding a
35899 pointer to the window. */
35902 init_window (int window_num)
35905 dispatch_windows *new_list;
35907 if (window_num == 0)
35908 new_list = dispatch_window_list;
35910 new_list = dispatch_window_list1;
35912 new_list->num_insn = 0;
35913 new_list->num_uops = 0;
35914 new_list->window_size = 0;
35915 new_list->next = NULL;
35916 new_list->prev = NULL;
35917 new_list->window_num = window_num;
35918 new_list->num_imm = 0;
35919 new_list->num_imm_32 = 0;
35920 new_list->num_imm_64 = 0;
35921 new_list->imm_size = 0;
35922 new_list->num_loads = 0;
35923 new_list->num_stores = 0;
35924 new_list->violation = false;
35926 for (i = 0; i < MAX_INSN; i++)
35928 new_list->window[i].insn = NULL;
35929 new_list->window[i].group = disp_no_group;
35930 new_list->window[i].path = no_path;
35931 new_list->window[i].byte_len = 0;
35932 new_list->window[i].imm_bytes = 0;
35937 /* This function allocates and initializes a dispatch window and the
35938 list container holding a pointer to the window. */
35940 static dispatch_windows *
35941 allocate_window (void)
35943 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
35944 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
35949 /* This routine initializes the dispatch scheduling information. It
35950 initiates building dispatch scheduler tables and constructs the
35951 first dispatch window. */
35954 init_dispatch_sched (void)
35956 /* Allocate a dispatch list and a window. */
35957 dispatch_window_list = allocate_window ();
35958 dispatch_window_list1 = allocate_window ();
35963 /* This function returns true if a branch is detected. End of a basic block
35964 does not have to be a branch, but here we assume only branches end a
35968 is_end_basic_block (enum dispatch_group group)
35970 return group == disp_branch;
35973 /* This function is called when the end of a window processing is reached. */
35976 process_end_window (void)
35978 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
35979 if (dispatch_window_list->next)
35981 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
35982 gcc_assert (dispatch_window_list->window_size
35983 + dispatch_window_list1->window_size <= 48);
35989 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
35990 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
35991 for 48 bytes of instructions. Note that these windows are not dispatch
35992 windows that their sizes are DISPATCH_WINDOW_SIZE. */
35994 static dispatch_windows *
35995 allocate_next_window (int window_num)
35997 if (window_num == 0)
35999 if (dispatch_window_list->next)
36002 return dispatch_window_list;
36005 dispatch_window_list->next = dispatch_window_list1;
36006 dispatch_window_list1->prev = dispatch_window_list;
36008 return dispatch_window_list1;
36011 /* Increment the number of immediate operands of an instruction. */
36014 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
36019 switch ( GET_CODE (*in_rtx))
36024 (imm_values->imm)++;
36025 if (x86_64_immediate_operand (*in_rtx, SImode))
36026 (imm_values->imm32)++;
36028 (imm_values->imm64)++;
36032 (imm_values->imm)++;
36033 (imm_values->imm64)++;
36037 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
36039 (imm_values->imm)++;
36040 (imm_values->imm32)++;
36051 /* Compute number of immediate operands of an instruction. */
36054 find_constant (rtx in_rtx, imm_info *imm_values)
36056 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
36057 (rtx_function) find_constant_1, (void *) imm_values);
36060 /* Return total size of immediate operands of an instruction along with number
36061 of corresponding immediate-operands. It initializes its parameters to zero
36062 befor calling FIND_CONSTANT.
36063 INSN is the input instruction. IMM is the total of immediates.
36064 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
36068 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
36070 imm_info imm_values = {0, 0, 0};
36072 find_constant (insn, &imm_values);
36073 *imm = imm_values.imm;
36074 *imm32 = imm_values.imm32;
36075 *imm64 = imm_values.imm64;
36076 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
36079 /* This function indicates if an operand of an instruction is an
36083 has_immediate (rtx insn)
36085 int num_imm_operand;
36086 int num_imm32_operand;
36087 int num_imm64_operand;
36090 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
36091 &num_imm64_operand);
36095 /* Return single or double path for instructions. */
36097 static enum insn_path
36098 get_insn_path (rtx insn)
36100 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
36102 if ((int)path == 0)
36103 return path_single;
36105 if ((int)path == 1)
36106 return path_double;
36111 /* Return insn dispatch group. */
36113 static enum dispatch_group
36114 get_insn_group (rtx insn)
36116 enum dispatch_group group = get_mem_group (insn);
36120 if (is_branch (insn))
36121 return disp_branch;
36126 if (has_immediate (insn))
36129 if (is_prefetch (insn))
36130 return disp_prefetch;
36132 return disp_no_group;
36135 /* Count number of GROUP restricted instructions in a dispatch
36136 window WINDOW_LIST. */
36139 count_num_restricted (rtx insn, dispatch_windows *window_list)
36141 enum dispatch_group group = get_insn_group (insn);
36143 int num_imm_operand;
36144 int num_imm32_operand;
36145 int num_imm64_operand;
36147 if (group == disp_no_group)
36150 if (group == disp_imm)
36152 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
36153 &num_imm64_operand);
36154 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
36155 || num_imm_operand + window_list->num_imm > MAX_IMM
36156 || (num_imm32_operand > 0
36157 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
36158 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
36159 || (num_imm64_operand > 0
36160 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
36161 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
36162 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
36163 && num_imm64_operand > 0
36164 && ((window_list->num_imm_64 > 0
36165 && window_list->num_insn >= 2)
36166 || window_list->num_insn >= 3)))
36172 if ((group == disp_load_store
36173 && (window_list->num_loads >= MAX_LOAD
36174 || window_list->num_stores >= MAX_STORE))
36175 || ((group == disp_load
36176 || group == disp_prefetch)
36177 && window_list->num_loads >= MAX_LOAD)
36178 || (group == disp_store
36179 && window_list->num_stores >= MAX_STORE))
36185 /* This function returns true if insn satisfies dispatch rules on the
36186 last window scheduled. */
36189 fits_dispatch_window (rtx insn)
36191 dispatch_windows *window_list = dispatch_window_list;
36192 dispatch_windows *window_list_next = dispatch_window_list->next;
36193 unsigned int num_restrict;
36194 enum dispatch_group group = get_insn_group (insn);
36195 enum insn_path path = get_insn_path (insn);
36198 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
36199 instructions should be given the lowest priority in the
36200 scheduling process in Haifa scheduler to make sure they will be
36201 scheduled in the same dispatch window as the refrence to them. */
36202 if (group == disp_jcc || group == disp_cmp)
36205 /* Check nonrestricted. */
36206 if (group == disp_no_group || group == disp_branch)
36209 /* Get last dispatch window. */
36210 if (window_list_next)
36211 window_list = window_list_next;
36213 if (window_list->window_num == 1)
36215 sum = window_list->prev->window_size + window_list->window_size;
36218 || (min_insn_size (insn) + sum) >= 48)
36219 /* Window 1 is full. Go for next window. */
36223 num_restrict = count_num_restricted (insn, window_list);
36225 if (num_restrict > num_allowable_groups[group])
36228 /* See if it fits in the first window. */
36229 if (window_list->window_num == 0)
36231 /* The first widow should have only single and double path
36233 if (path == path_double
36234 && (window_list->num_uops + 2) > MAX_INSN)
36236 else if (path != path_single)
36242 /* Add an instruction INSN with NUM_UOPS micro-operations to the
36243 dispatch window WINDOW_LIST. */
36246 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
36248 int byte_len = min_insn_size (insn);
36249 int num_insn = window_list->num_insn;
36251 sched_insn_info *window = window_list->window;
36252 enum dispatch_group group = get_insn_group (insn);
36253 enum insn_path path = get_insn_path (insn);
36254 int num_imm_operand;
36255 int num_imm32_operand;
36256 int num_imm64_operand;
36258 if (!window_list->violation && group != disp_cmp
36259 && !fits_dispatch_window (insn))
36260 window_list->violation = true;
36262 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
36263 &num_imm64_operand);
36265 /* Initialize window with new instruction. */
36266 window[num_insn].insn = insn;
36267 window[num_insn].byte_len = byte_len;
36268 window[num_insn].group = group;
36269 window[num_insn].path = path;
36270 window[num_insn].imm_bytes = imm_size;
36272 window_list->window_size += byte_len;
36273 window_list->num_insn = num_insn + 1;
36274 window_list->num_uops = window_list->num_uops + num_uops;
36275 window_list->imm_size += imm_size;
36276 window_list->num_imm += num_imm_operand;
36277 window_list->num_imm_32 += num_imm32_operand;
36278 window_list->num_imm_64 += num_imm64_operand;
36280 if (group == disp_store)
36281 window_list->num_stores += 1;
36282 else if (group == disp_load
36283 || group == disp_prefetch)
36284 window_list->num_loads += 1;
36285 else if (group == disp_load_store)
36287 window_list->num_stores += 1;
36288 window_list->num_loads += 1;
36292 /* Adds a scheduled instruction, INSN, to the current dispatch window.
36293 If the total bytes of instructions or the number of instructions in
36294 the window exceed allowable, it allocates a new window. */
36297 add_to_dispatch_window (rtx insn)
36300 dispatch_windows *window_list;
36301 dispatch_windows *next_list;
36302 dispatch_windows *window0_list;
36303 enum insn_path path;
36304 enum dispatch_group insn_group;
36312 if (INSN_CODE (insn) < 0)
36315 byte_len = min_insn_size (insn);
36316 window_list = dispatch_window_list;
36317 next_list = window_list->next;
36318 path = get_insn_path (insn);
36319 insn_group = get_insn_group (insn);
36321 /* Get the last dispatch window. */
36323 window_list = dispatch_window_list->next;
36325 if (path == path_single)
36327 else if (path == path_double)
36330 insn_num_uops = (int) path;
36332 /* If current window is full, get a new window.
36333 Window number zero is full, if MAX_INSN uops are scheduled in it.
36334 Window number one is full, if window zero's bytes plus window
36335 one's bytes is 32, or if the bytes of the new instruction added
36336 to the total makes it greater than 48, or it has already MAX_INSN
36337 instructions in it. */
36338 num_insn = window_list->num_insn;
36339 num_uops = window_list->num_uops;
36340 window_num = window_list->window_num;
36341 insn_fits = fits_dispatch_window (insn);
36343 if (num_insn >= MAX_INSN
36344 || num_uops + insn_num_uops > MAX_INSN
36347 window_num = ~window_num & 1;
36348 window_list = allocate_next_window (window_num);
36351 if (window_num == 0)
36353 add_insn_window (insn, window_list, insn_num_uops);
36354 if (window_list->num_insn >= MAX_INSN
36355 && insn_group == disp_branch)
36357 process_end_window ();
36361 else if (window_num == 1)
36363 window0_list = window_list->prev;
36364 sum = window0_list->window_size + window_list->window_size;
36366 || (byte_len + sum) >= 48)
36368 process_end_window ();
36369 window_list = dispatch_window_list;
36372 add_insn_window (insn, window_list, insn_num_uops);
36375 gcc_unreachable ();
36377 if (is_end_basic_block (insn_group))
36379 /* End of basic block is reached do end-basic-block process. */
36380 process_end_window ();
36385 /* Print the dispatch window, WINDOW_NUM, to FILE. */
36387 DEBUG_FUNCTION static void
36388 debug_dispatch_window_file (FILE *file, int window_num)
36390 dispatch_windows *list;
36393 if (window_num == 0)
36394 list = dispatch_window_list;
36396 list = dispatch_window_list1;
36398 fprintf (file, "Window #%d:\n", list->window_num);
36399 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
36400 list->num_insn, list->num_uops, list->window_size);
36401 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
36402 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
36404 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
36406 fprintf (file, " insn info:\n");
36408 for (i = 0; i < MAX_INSN; i++)
36410 if (!list->window[i].insn)
36412 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
36413 i, group_name[list->window[i].group],
36414 i, (void *)list->window[i].insn,
36415 i, list->window[i].path,
36416 i, list->window[i].byte_len,
36417 i, list->window[i].imm_bytes);
36421 /* Print to stdout a dispatch window. */
36423 DEBUG_FUNCTION void
36424 debug_dispatch_window (int window_num)
36426 debug_dispatch_window_file (stdout, window_num);
36429 /* Print INSN dispatch information to FILE. */
36431 DEBUG_FUNCTION static void
36432 debug_insn_dispatch_info_file (FILE *file, rtx insn)
36435 enum insn_path path;
36436 enum dispatch_group group;
36438 int num_imm_operand;
36439 int num_imm32_operand;
36440 int num_imm64_operand;
36442 if (INSN_CODE (insn) < 0)
36445 byte_len = min_insn_size (insn);
36446 path = get_insn_path (insn);
36447 group = get_insn_group (insn);
36448 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
36449 &num_imm64_operand);
36451 fprintf (file, " insn info:\n");
36452 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
36453 group_name[group], path, byte_len);
36454 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
36455 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
36458 /* Print to STDERR the status of the ready list with respect to
36459 dispatch windows. */
36461 DEBUG_FUNCTION void
36462 debug_ready_dispatch (void)
36465 int no_ready = number_in_ready ();
36467 fprintf (stdout, "Number of ready: %d\n", no_ready);
36469 for (i = 0; i < no_ready; i++)
36470 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
36473 /* This routine is the driver of the dispatch scheduler. */
36476 do_dispatch (rtx insn, int mode)
36478 if (mode == DISPATCH_INIT)
36479 init_dispatch_sched ();
36480 else if (mode == ADD_TO_DISPATCH_WINDOW)
36481 add_to_dispatch_window (insn);
36484 /* Return TRUE if Dispatch Scheduling is supported. */
36487 has_dispatch (rtx insn, int action)
36489 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
36490 && flag_dispatch_scheduler)
36496 case IS_DISPATCH_ON:
36501 return is_cmp (insn);
36503 case DISPATCH_VIOLATION:
36504 return dispatch_violation ();
36506 case FITS_DISPATCH_WINDOW:
36507 return fits_dispatch_window (insn);
36513 /* Implementation of reassociation_width target hook used by
36514 reassoc phase to identify parallelism level in reassociated
36515 tree. Statements tree_code is passed in OPC. Arguments type
36518 Currently parallel reassociation is enabled for Atom
36519 processors only and we set reassociation width to be 2
36520 because Atom may issue up to 2 instructions per cycle.
36522 Return value should be fixed if parallel reassociation is
36523 enabled for other processors. */
36526 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
36527 enum machine_mode mode)
36531 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
36533 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
36539 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
36540 place emms and femms instructions. */
36542 static enum machine_mode
36543 ix86_preferred_simd_mode (enum machine_mode mode)
36551 return TARGET_AVX2 ? V32QImode : V16QImode;
36553 return TARGET_AVX2 ? V16HImode : V8HImode;
36555 return TARGET_AVX2 ? V8SImode : V4SImode;
36557 return TARGET_AVX2 ? V4DImode : V2DImode;
36560 if (TARGET_AVX && !TARGET_PREFER_AVX128)
36566 if (!TARGET_VECTORIZE_DOUBLE)
36568 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
36570 else if (TARGET_SSE2)
36579 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
36582 static unsigned int
36583 ix86_autovectorize_vector_sizes (void)
36585 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
36588 /* Initialize the GCC target structure. */
36589 #undef TARGET_RETURN_IN_MEMORY
36590 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
36592 #undef TARGET_LEGITIMIZE_ADDRESS
36593 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
36595 #undef TARGET_ATTRIBUTE_TABLE
36596 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
36597 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
36598 # undef TARGET_MERGE_DECL_ATTRIBUTES
36599 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
36602 #undef TARGET_COMP_TYPE_ATTRIBUTES
36603 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
36605 #undef TARGET_INIT_BUILTINS
36606 #define TARGET_INIT_BUILTINS ix86_init_builtins
36607 #undef TARGET_BUILTIN_DECL
36608 #define TARGET_BUILTIN_DECL ix86_builtin_decl
36609 #undef TARGET_EXPAND_BUILTIN
36610 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
36612 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
36613 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
36614 ix86_builtin_vectorized_function
36616 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
36617 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
36619 #undef TARGET_BUILTIN_RECIPROCAL
36620 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
36622 #undef TARGET_ASM_FUNCTION_EPILOGUE
36623 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
36625 #undef TARGET_ENCODE_SECTION_INFO
36626 #ifndef SUBTARGET_ENCODE_SECTION_INFO
36627 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
36629 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
36632 #undef TARGET_ASM_OPEN_PAREN
36633 #define TARGET_ASM_OPEN_PAREN ""
36634 #undef TARGET_ASM_CLOSE_PAREN
36635 #define TARGET_ASM_CLOSE_PAREN ""
36637 #undef TARGET_ASM_BYTE_OP
36638 #define TARGET_ASM_BYTE_OP ASM_BYTE
36640 #undef TARGET_ASM_ALIGNED_HI_OP
36641 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
36642 #undef TARGET_ASM_ALIGNED_SI_OP
36643 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
36645 #undef TARGET_ASM_ALIGNED_DI_OP
36646 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
36649 #undef TARGET_PROFILE_BEFORE_PROLOGUE
36650 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
36652 #undef TARGET_ASM_UNALIGNED_HI_OP
36653 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
36654 #undef TARGET_ASM_UNALIGNED_SI_OP
36655 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
36656 #undef TARGET_ASM_UNALIGNED_DI_OP
36657 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
36659 #undef TARGET_PRINT_OPERAND
36660 #define TARGET_PRINT_OPERAND ix86_print_operand
36661 #undef TARGET_PRINT_OPERAND_ADDRESS
36662 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
36663 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
36664 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
36665 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
36666 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
36668 #undef TARGET_SCHED_INIT_GLOBAL
36669 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
36670 #undef TARGET_SCHED_ADJUST_COST
36671 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
36672 #undef TARGET_SCHED_ISSUE_RATE
36673 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
36674 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
36675 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
36676 ia32_multipass_dfa_lookahead
36678 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
36679 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
36682 #undef TARGET_HAVE_TLS
36683 #define TARGET_HAVE_TLS true
36685 #undef TARGET_CANNOT_FORCE_CONST_MEM
36686 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
36687 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
36688 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
36690 #undef TARGET_DELEGITIMIZE_ADDRESS
36691 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
36693 #undef TARGET_MS_BITFIELD_LAYOUT_P
36694 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
36697 #undef TARGET_BINDS_LOCAL_P
36698 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
36700 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
36701 #undef TARGET_BINDS_LOCAL_P
36702 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
36705 #undef TARGET_ASM_OUTPUT_MI_THUNK
36706 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
36707 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
36708 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
36710 #undef TARGET_ASM_FILE_START
36711 #define TARGET_ASM_FILE_START x86_file_start
36713 #undef TARGET_OPTION_OVERRIDE
36714 #define TARGET_OPTION_OVERRIDE ix86_option_override
36716 #undef TARGET_REGISTER_MOVE_COST
36717 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
36718 #undef TARGET_MEMORY_MOVE_COST
36719 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
36720 #undef TARGET_RTX_COSTS
36721 #define TARGET_RTX_COSTS ix86_rtx_costs
36722 #undef TARGET_ADDRESS_COST
36723 #define TARGET_ADDRESS_COST ix86_address_cost
36725 #undef TARGET_FIXED_CONDITION_CODE_REGS
36726 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
36727 #undef TARGET_CC_MODES_COMPATIBLE
36728 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
36730 #undef TARGET_MACHINE_DEPENDENT_REORG
36731 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
36733 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
36734 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
36736 #undef TARGET_BUILD_BUILTIN_VA_LIST
36737 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
36739 #undef TARGET_ENUM_VA_LIST_P
36740 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
36742 #undef TARGET_FN_ABI_VA_LIST
36743 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
36745 #undef TARGET_CANONICAL_VA_LIST_TYPE
36746 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
36748 #undef TARGET_EXPAND_BUILTIN_VA_START
36749 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
36751 #undef TARGET_MD_ASM_CLOBBERS
36752 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
36754 #undef TARGET_PROMOTE_PROTOTYPES
36755 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
36756 #undef TARGET_STRUCT_VALUE_RTX
36757 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
36758 #undef TARGET_SETUP_INCOMING_VARARGS
36759 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
36760 #undef TARGET_MUST_PASS_IN_STACK
36761 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
36762 #undef TARGET_FUNCTION_ARG_ADVANCE
36763 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
36764 #undef TARGET_FUNCTION_ARG
36765 #define TARGET_FUNCTION_ARG ix86_function_arg
36766 #undef TARGET_FUNCTION_ARG_BOUNDARY
36767 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
36768 #undef TARGET_PASS_BY_REFERENCE
36769 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
36770 #undef TARGET_INTERNAL_ARG_POINTER
36771 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
36772 #undef TARGET_UPDATE_STACK_BOUNDARY
36773 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
36774 #undef TARGET_GET_DRAP_RTX
36775 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
36776 #undef TARGET_STRICT_ARGUMENT_NAMING
36777 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
36778 #undef TARGET_STATIC_CHAIN
36779 #define TARGET_STATIC_CHAIN ix86_static_chain
36780 #undef TARGET_TRAMPOLINE_INIT
36781 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
36782 #undef TARGET_RETURN_POPS_ARGS
36783 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
36785 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
36786 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
36788 #undef TARGET_SCALAR_MODE_SUPPORTED_P
36789 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
36791 #undef TARGET_VECTOR_MODE_SUPPORTED_P
36792 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
36794 #undef TARGET_C_MODE_FOR_SUFFIX
36795 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
36798 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
36799 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
36802 #ifdef SUBTARGET_INSERT_ATTRIBUTES
36803 #undef TARGET_INSERT_ATTRIBUTES
36804 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
36807 #undef TARGET_MANGLE_TYPE
36808 #define TARGET_MANGLE_TYPE ix86_mangle_type
36810 #ifndef TARGET_MACHO
36811 #undef TARGET_STACK_PROTECT_FAIL
36812 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
36815 #undef TARGET_FUNCTION_VALUE
36816 #define TARGET_FUNCTION_VALUE ix86_function_value
36818 #undef TARGET_FUNCTION_VALUE_REGNO_P
36819 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
36821 #undef TARGET_PROMOTE_FUNCTION_MODE
36822 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
36824 #undef TARGET_SECONDARY_RELOAD
36825 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
36827 #undef TARGET_CLASS_MAX_NREGS
36828 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
36830 #undef TARGET_PREFERRED_RELOAD_CLASS
36831 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
36832 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
36833 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
36834 #undef TARGET_CLASS_LIKELY_SPILLED_P
36835 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
36837 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
36838 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
36839 ix86_builtin_vectorization_cost
36840 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
36841 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
36842 ix86_vectorize_builtin_vec_perm
36843 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
36844 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
36845 ix86_vectorize_builtin_vec_perm_ok
36846 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
36847 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
36848 ix86_preferred_simd_mode
36849 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
36850 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
36851 ix86_autovectorize_vector_sizes
36853 #undef TARGET_SET_CURRENT_FUNCTION
36854 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
36856 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
36857 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
36859 #undef TARGET_OPTION_SAVE
36860 #define TARGET_OPTION_SAVE ix86_function_specific_save
36862 #undef TARGET_OPTION_RESTORE
36863 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
36865 #undef TARGET_OPTION_PRINT
36866 #define TARGET_OPTION_PRINT ix86_function_specific_print
36868 #undef TARGET_CAN_INLINE_P
36869 #define TARGET_CAN_INLINE_P ix86_can_inline_p
36871 #undef TARGET_EXPAND_TO_RTL_HOOK
36872 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
36874 #undef TARGET_LEGITIMATE_ADDRESS_P
36875 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
36877 #undef TARGET_LEGITIMATE_CONSTANT_P
36878 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
36880 #undef TARGET_FRAME_POINTER_REQUIRED
36881 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
36883 #undef TARGET_CAN_ELIMINATE
36884 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
36886 #undef TARGET_EXTRA_LIVE_ON_ENTRY
36887 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
36889 #undef TARGET_ASM_CODE_END
36890 #define TARGET_ASM_CODE_END ix86_code_end
36892 #undef TARGET_CONDITIONAL_REGISTER_USAGE
36893 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
36896 #undef TARGET_INIT_LIBFUNCS
36897 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
36900 struct gcc_target targetm = TARGET_INITIALIZER;
36902 #include "gt-i386.h"